We look for genes associated with fibroblast activation in several single-cell datasets.
suppressPackageStartupMessages({
# install.packages("BiocManager")
# install.packages("kableExtra")
requireNamespace("kableExtra")
# ?
# devtools::install_github("rstudio/crosstalk")
# library("crosstalk")
# library("htmlwidgets")
# install.packages("crunch")
# requireNamespace("crunch")
# https://rstudio.github.io/DT/
# install.packages("DT")
requireNamespace("DT")
# install.packages("RCurl")
requireNamespace("RCurl")
# install.packages("memoise")
requireNamespace("memoise")
# install.packages("dplyr")
library(dplyr)
# install.packages("ggplot2")
library(ggplot2)
# install.packages("ggsci")
requireNamespace("ggsci")
# install.packages("ggtext")
requireNamespace("ggtext")
# install.packages("ggrepel")
requireNamespace("ggrepel")
# install.packages("assertthat")
requireNamespace("assertthat")
#
requireNamespace("magrittr")
`%<>%` <- magrittr::`%<>%`
`%>%` <- magrittr::`%>%`
# install.packages("glue")
requireNamespace("glue")
glue <- glue::glue
# install.packages("tidyr")
requireNamespace("tidyr")
# BiocManager::install("org.Mm.eg.db")
# BiocManager::install("org.Hs.eg.db")
requireNamespace("org.Mm.eg.db")
requireNamespace("org.Hs.eg.db")
# BiocManager::install("GO.db")
requireNamespace("GO.db")
# https://www.bioconductor.org/packages/release/bioc/vignettes/goseq/inst/doc/goseq.pdf
# BiocManager::install("goseq")
# requireNamespace("goseq")
# install.packages("pracma")
requireNamespace("pracma")
# https://vroom.r-lib.org/articles/vroom.html
# install.packages("vroom")
requireNamespace("vroom")
# BiocManager::install("DESeq2")
requireNamespace("DESeq2")
# BiocManager::install("scater")
requireNamespace("scater")
# BiocManager::install("scran")
# requireNamespace("scran")
# BiocManager::install("igraph")
requireNamespace("igraph")
# install.packages("pheatmap")
requireNamespace("pheatmap")
# install.packages("gridExtra")
requireNamespace("gridExtra")
# install.packages("pathlibr")
requireNamespace("pathlibr")
# install.packages("stringr")
requireNamespace("stringr")
# install.packages("reshape2")
requireNamespace("reshape2")
# install.packages("pbapply")
# requireNamespace("pbapply")
# install.packages("Rtsne")
requireNamespace("Rtsne")
# install.packages("uwot")
requireNamespace("uwot")
# avoid importing `exprs` that leads to clashes
requireNamespace("rlang")
# install.packages("Seurat")
# requireNamespace("Seurat")
# BiocManager::install("TrajectoryUtils") # Didn't work
# devtools::install_github("LTLA/TrajectoryUtils@e943606")
# BiocManager::install("kstreet13/slingshot@25fc566")
# requireNamespace("slingshot")
# BiocManager::install("tradeSeq")
# requireNamespace("tradeSeq")
# install.packages("statmod")
# BiocManager::install("edgeR")
requireNamespace("edgeR")
# https://github.com/eliocamp/ggnewscale
# install.packages("ggnewscale")
# requireNamespace("ggnewscale")
# https://www.rdocumentation.org/packages/DGCA/versions/1.0.2
# BiocManager::install("impute")
# BiocManager::install("preprocessCore")
# BiocManager::install("GO.db")
# install.packages("WGCNA")
# install.packages("DGCA")
requireNamespace("DGCA")
# https://github.com/danielschw188/Revelio
# devtools::install_github('danielschw188/Revelio@e8e1492')
requireNamespace("Revelio")
# # install.packages("rgl")
# requireNamespace("rgl")
# install.packages("plotly")
requireNamespace("plotly")
# https://www.rdocumentation.org/packages/Hmisc/versions/4.5-0/topics/rcorr
# install.packages("Hmisc")
requireNamespace("Hmisc")
# BiocManager::install("multiGSEA")
# requireNamespace("multiGSEA")
# install.packages("tidygraph")
# install.packages("ggraph")
requireNamespace("ggraph")
requireNamespace("tidygraph")
# https://htmlpreview.github.io/?https://github.com/aertslab/SCENIC/blob/master/inst/doc/SCENIC_Setup.html
# BiocManager::install(c("AUCell", "RcisTarget"))
# BiocManager::install(c("GRNBoost"))
# BiocManager::install(c("doMC", "doRNG"))
# devtools::install_github("aertslab/SCENIC@0585e87")
# requireNamespace("SCENIC")
})
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/glue/libs/glue.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/xml2/libs/xml2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/colorspace/libs/colorspace.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/systemfonts/libs/systemfonts.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/svglite/libs/svglite.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bitops/libs/bitops.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RCurl/libs/RCurl.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fastmap/libs/fastmap.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/cachem/libs/cachem.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/vctrs/libs/vctrs.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/ellipsis/libs/ellipsis.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fansi/libs/fansi.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/utf8/libs/utf8.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tibble/libs/tibble.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/purrr/libs/purrr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/dplyr/libs/dplyr.so") ...
## now dyn.load("/usr/lib/R/library/grid/libs/grid.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Rcpp/libs/Rcpp.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/gridtext/libs/gridtext.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/ggrepel/libs/ggrepel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tidyr/libs/tidyr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bit/libs/bit.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/bit64/libs/bit64.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RSQLite/libs/RSQLite.so") ...
## now dyn.load("/usr/lib/R/library/parallel/libs/parallel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Biobase/libs/Biobase.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/S4Vectors/libs/S4Vectors.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/IRanges/libs/IRanges.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/png/libs/png.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/XVector/libs/XVector.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Biostrings/libs/Biostrings.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/vroom/libs/vroom.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tzdb/libs/tzdb.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocParallel/libs/BiocParallel.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/GenomicRanges/libs/GenomicRanges.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/matrixStats/libs/matrixStats.so") ...
## now dyn.load("/usr/lib/R/library/lattice/libs/lattice.so") ...
## now dyn.load("/usr/lib/R/library/Matrix/libs/Matrix.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/DelayedArray/libs/DelayedArray.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/XML/libs/XML.so") ...
## now dyn.load("/usr/lib/R/library/splines/libs/splines.so") ...
## now dyn.load("/usr/lib/R/library/survival/libs/survival.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/genefilter/libs/genefilter.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/locfit/libs/locfit.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/DESeq2/libs/DESeq2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/sparseMatrixStats/libs/sparseMatrixStats.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/beachmat/libs/beachmat.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/scuttle/libs/scuttle.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocNeighbors/libs/BiocNeighbors.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/irlba/libs/irlba.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/BiocSingular/libs/BiocSingular.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/beeswarm/libs/beeswarm.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/igraph/libs/igraph.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/plyr/libs/plyr.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/reshape2/libs/reshape2.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Rtsne/libs/Rtsne.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/uwot/libs/uwot.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/limma/libs/limma.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/edgeR/libs/edgeR.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/fastcluster/libs/fastcluster.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/base64enc/libs/base64enc.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/jpeg/libs/jpeg.so") ...
## now dyn.load("/usr/lib/R/library/cluster/libs/cluster.so") ...
## now dyn.load("/usr/lib/R/library/foreign/libs/foreign.so") ...
## now dyn.load("/usr/lib/R/library/nnet/libs/nnet.so") ...
## now dyn.load("/usr/lib/R/library/rpart/libs/rpart.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/checkmate/libs/checkmate.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/backports/libs/backports.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/data.table/libs/datatable.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/Hmisc/libs/Hmisc.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/impute/libs/impute.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/preprocessCore/libs/preprocessCore.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/WGCNA/libs/WGCNA.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/lazyeval/libs/lazyeval.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/tidygraph/libs/tidygraph.so") ...
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=lzh_TW
## [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=lzh_TW LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=lzh_TW LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=lzh_TW LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_3.3.5 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] backports_1.2.1 Hmisc_4.5-0
## [3] systemfonts_1.0.2 plyr_1.8.6
## [5] igraph_1.2.6 lazyeval_0.2.2
## [7] splines_4.1.0 Revelio_0.1.0
## [9] BiocParallel_1.26.1 GenomeInfoDb_1.28.1
## [11] scater_1.20.1 digest_0.6.27
## [13] foreach_1.5.1 htmltools_0.5.1.1
## [15] viridis_0.6.1 GO.db_3.13.0
## [17] fansi_0.5.0 checkmate_2.0.0
## [19] magrittr_2.0.1 memoise_2.0.0
## [21] ScaledMatrix_1.0.0 cluster_2.1.2
## [23] doParallel_1.0.16 tzdb_0.1.1
## [25] limma_3.48.1 fastcluster_1.2.3
## [27] Biostrings_2.60.1 annotate_1.70.0
## [29] matrixStats_0.59.0 vroom_1.5.1
## [31] svglite_2.0.0 jpeg_0.1-8.1
## [33] colorspace_2.0-2 blob_1.2.1
## [35] rvest_1.0.0 ggrepel_0.9.1
## [37] xfun_0.24 crayon_1.4.1
## [39] RCurl_1.98-1.3 jsonlite_1.7.2
## [41] org.Mm.eg.db_3.13.0 genefilter_1.74.0
## [43] impute_1.66.0 survival_3.2-11
## [45] iterators_1.0.13 glue_1.4.2
## [47] kableExtra_1.3.4 gtable_0.3.0
## [49] zlibbioc_1.38.0 XVector_0.32.0
## [51] webshot_0.5.2 DelayedArray_0.18.0
## [53] BiocSingular_1.8.1 SingleCellExperiment_1.14.1
## [55] BiocGenerics_0.38.0 scales_1.1.1
## [57] pheatmap_1.0.12 DBI_1.1.1
## [59] edgeR_3.34.0 Rcpp_1.0.6
## [61] htmlTable_2.2.1 viridisLite_0.4.0
## [63] xtable_1.8-4 gridtext_0.1.4
## [65] foreign_0.8-81 bit_4.0.4
## [67] rsvd_1.0.5 preprocessCore_1.54.0
## [69] Formula_1.2-4 stats4_4.1.0
## [71] DT_0.18 htmlwidgets_1.5.3
## [73] httr_1.4.2 RColorBrewer_1.1-2
## [75] ellipsis_0.3.2 pkgconfig_2.0.3
## [77] XML_3.99-0.6 scuttle_1.2.0
## [79] nnet_7.3-16 sass_0.4.0
## [81] uwot_0.1.10 locfit_1.5-9.4
## [83] utf8_1.2.1 dynamicTreeCut_1.63-1
## [85] tidyselect_1.1.1 rlang_0.4.11
## [87] reshape2_1.4.4 AnnotationDbi_1.54.1
## [89] munsell_0.5.0 tools_4.1.0
## [91] cachem_1.0.5 generics_0.1.0
## [93] RSQLite_2.2.7 evaluate_0.14
## [95] stringr_1.4.0 fastmap_1.1.0
## [97] yaml_2.2.1 org.Hs.eg.db_3.13.0
## [99] knitr_1.33 bit64_4.0.5
## [101] tidygraph_1.2.0 purrr_0.3.4
## [103] KEGGREST_1.32.0 sparseMatrixStats_1.4.0
## [105] pracma_2.3.3 xml2_1.3.2
## [107] compiler_4.1.0 rstudioapi_0.13
## [109] plotly_4.9.4.1 beeswarm_0.4.0
## [111] png_0.1-7 tibble_3.1.2
## [113] geneplotter_1.70.0 bslib_0.2.5.1
## [115] stringi_1.6.2 lattice_0.20-44
## [117] DGCA_1.0.2 Matrix_1.3-4
## [119] ggsci_2.9 vctrs_0.3.8
## [121] pillar_1.6.1 lifecycle_1.0.0
## [123] jquerylib_0.1.4 BiocNeighbors_1.10.0
## [125] data.table_1.14.0 bitops_1.0-7
## [127] irlba_2.3.3 GenomicRanges_1.44.0
## [129] latticeExtra_0.6-29 R6_2.5.0
## [131] gridExtra_2.3 vipor_0.4.5
## [133] IRanges_2.26.0 codetools_0.2-18
## [135] assertthat_0.2.1 SummarizedExperiment_1.22.0
## [137] DESeq2_1.32.0 withr_2.4.2
## [139] S4Vectors_0.30.0 GenomeInfoDbData_1.2.6
## [141] parallel_4.1.0 ggtext_0.1.1
## [143] rpart_4.1-15 grid_4.1.0
## [145] pathlibr_0.1.0 beachmat_2.8.0
## [147] tidyr_1.1.3 rmarkdown_2.9
## [149] DelayedMatrixStats_1.14.0 MatrixGenerics_1.4.0
## [151] Rtsne_0.15 Biobase_2.52.0
## [153] WGCNA_1.70-3 base64enc_0.1-3
## [155] ggbeeswarm_0.6.0
set.seed(43)
dual <- function(f, g) {
if (missing(g)) {
stopifnot(class(f) == "function")
(function(L) do.call(f, Filter(Negate(is.null), L)))
} else {
stopifnot(class(f) == "list")
stopifnot(class(g) == "function")
dual(g)(f)
}
}
Example 1:
list(
biggle = list(A = "hello", B = "world"),
wiggle = list(A = "Hello", B = "World", C = NULL)
) %>%
lapply(dual(function(B, A) paste(A, B)))
## $biggle
## [1] "hello world"
##
## $wiggle
## [1] "Hello World"
Example 2:
list(
A = data.frame(x = c("xa1", "xa2"), y = c("ya1", "ya2")),
B = data.frame(x = c("xb1", "xb2"), y = c("yb1", "yb2")),
C = data.frame(x = c("xc1", "xc2"), y = c("yc1", "yc2"))
) %>%
# No need for the brackets!
dual(cbind)
## A.x A.y B.x B.y C.x C.y
## 1 xa1 ya1 xb1 yb1 xc1 yc1
## 2 xa2 ya2 xb2 yb2 xc2 yc2
culprits <- function() {
search()[[1]] %>%
ls(name = .) %>%
sapply(. %>% get() %>% object.size() %>% "/"(1e6)) %>%
data.frame(size = .) %>%
arrange(size) %>%
mutate(cumsize = cumsum(size)) %>%
arrange(desc(size)) %>%
mutate(across(everything(), ~ signif(.x, digits = 2))) %>%
mutate(unit = "Mb")
}
Example:
culprits() %>% head(3)
## size cumsize unit
## dual 0.074 0.120 Mb
## glue 0.022 0.048 Mb
## culprits 0.011 0.026 Mb
for (i in 1:2) {
BASEPATH <- pathlibr::Path$new(Sys.glob(paste(c("work", ".."), "*FibroAct", sep = "/")))
setwd(BASEPATH$show)
}
print(paste("Work path:", BASEPATH))
## [1] "Work path: ../20210502-FibroAct"
stopifnot("main.Rmd" %in% names(BASEPATH$dir))
path_to <- (function(.) Sys.glob(BASEPATH$join("../..")$join(.)$show))
out_file <- (function(.) BASEPATH$join("output")$join(.)$show)
dir.create(out_file(""), showWarnings = FALSE)
memo <- function(f) {
memoise::memoise(
f,
cache = cachem::cache_disk(BASEPATH$join("main_cache/memoized")$show)
)
}
Basic table printing.
kable <- function(.) {
kableExtra::kbl(., align = "c") %>%
kableExtra::kable_paper("hover", full_width = FALSE)
}
Options for fancy datatable printing.
options(DT.options = list(
pageLength = 16,
language = list(search = "Filter:"),
scrollX = TRUE
))
Converting a symbol or GO ID to a link.
as.link <-
function(x) {
dplyr::case_when(
# If x is like GO:003453
startsWith(x, "GO:") ~ paste0(
"<a target='_blank'",
"href='", "http://amigo.geneontology.org/amigo/term/", x, "'",
">", x, "</a>"
),
# Otherwise try this
TRUE ~ paste0(
"<a target='_blank'",
"href='", "http://www.informatics.jax.org/marker/summary?nomen=", x, "'",
">", x, "</a>"
)
)
}
Example
data.frame(x = c("GO:00345", "Lama")) %>%
mutate(link = as.link(x)) %>%
DT::datatable(width = "100%")
Printing a sequence of tables
kable_all <- function(tt) {
for (t in tt) {
t %>%
table() %>%
t() %>%
kable() %>%
print()
}
}
# with results='asis'
list(
a = c(c(1:5), c(2:5), c(3:5)),
b = c(c(1:6), c(2:6), c(3:6)),
b = c(c(1:7), c(2:7), c(3:7)),
b = c(c(1:9), c(2:9), c(3:9))
) %>% kable_all()
| 1 | 2 | 3 | 4 | 5 |
|---|---|---|---|---|
| 1 | 2 | 3 | 3 | 3 |
| 1 | 2 | 3 | 4 | 5 | 6 |
|---|---|---|---|---|---|
| 1 | 2 | 3 | 3 | 3 | 3 |
| 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|---|---|---|---|---|---|---|
| 1 | 2 | 3 | 3 | 3 | 3 | 3 |
| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|
| 1 | 2 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |
ggplot2::theme_set(theme_light(base_size = 15))
gglast <- function(what) {
p <- ggplot2::last_plot()
q <- p$mapping[[deparse(substitute(what))]]
rlang::eval_tidy(q, data = p$data)
}
# example: scale_fill_manual(values = color_map, breaks = gglast(fill))
# Converts an annotation column (e.g. sce$group)
# to a named vector of colors (group -> color)
# List of palettes: grDevices::hcl.pals()
# http://colorspace.r-forge.r-project.org/reference/hcl_palettes.html
ramp <- function(anno, palette = "Temps") {
anno %>%
as.character() %>%
unique() %>%
c(NA) %>%
data.frame(
item = .,
# https://developer.r-project.org/Blog/public/2019/04/01/hcl-based-color-palettes-in-grdevices/
color = length(.) %>% colorRampPalette(grDevices::hcl.colors(n = ., palette = palette))(.)
) %>%
tidyr::drop_na() %>%
pull(color, item)
}
Example:
ramp(c("Grp1", "Grp2", "Grp1"), palette = "Dark 3") %>%
t() %>%
kable()
| Grp1 | Grp2 |
|---|---|
| #E16A86 | #50A315 |
RGB <- function(arr, alpha = 1) {
stopifnot(ncol(arr) == 3)
arr %>%
as.data.frame() %>%
`/`(255) %>%
mutate(alpha = alpha) %>%
mutate(colors = apply(., 1, \(x) rgb(x[1], x[2], x[3], alpha = x[["alpha"]]))) %>%
pull(colors)
}
Example:
(1:7) %>%
percent_rank() %>%
(colorRamp(c("blue", "red"))) %>%
RGB(alpha = 0.5) %>%
t() %>%
kable()
| #0000FF80 | #2B00D580 | #5500AA80 | #80008080 | #AA005580 | #D5002A80 | #FF000080 |
Colors for consistency; may be overwritten below.
colors <- unlist(
c(
list(
`374_VLMC` = "aquamarine3",
`375_VLMC` = "chartreuse4",
`376_VLMC` = "chartreuse3",
`FB1` = "cornflowerblue",
`FB2` = "blue",
`Ctrl` = "chartreuse4",
`EAE` = "coral2",
`VLMC1` = "chartreuse1",
`VLMC3` = "coral3",
`VLMC4` = "coral4",
`healthy` = "green",
`EAE1` = "coral1",
`EAE2` = "coral3",
`healthy1` = "aquamarine",
`healthy2` = "chartreuse3",
`healthy3` = "chartreuse4",
`positive` = "Red4",
`negative` = "Steelblue1"
),
ramp(c("G1.S", "S", "G2", "G2.M", "M.G1"), palette = "Dark 3")
)
)
colors %>%
unlist() %>%
data.frame(c = .) %>%
mutate(item = rownames(.)) %>%
ggplot(aes(y = item, x = 1)) +
geom_tile(fill = colors, stat = "identity") +
scale_y_discrete(limits = rev(names(colors))) +
theme(
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title = element_blank()
)
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/farver/libs/farver.so") ...

Zoomable figures.
///*
$(document).ready(
function() {
$("img.zoomable").on("click", function(evt) {
if (!evt.originalEvent.shiftKey && evt.originalEvent.altKey) {
var el = $(this).clone();
el.attr("width", "2048px");
window.open().document.write(el.wrap('<p>').parent().html());
return false;
}
});
}
);
//*/
Alt+click on a figure to zoom:
list(
randu %>% ggplot(aes(x = x, y = y)) +
geom_point() +
ggtitle("Zoomable 1"),
randu %>% ggplot(aes(x = x, y = z)) +
geom_point() +
ggtitle("Zoomable 2")
)


Click-scrollable figures (like slides):
$(document).ready(
function() {
$("img.scrollable").css({
'position': 'absolute', 'top': 0, 'left': 0, 'z-index': 100000,
});
var parent = $("img.scrollable").parent();
parent.css({'position': 'relative'});
parent.find('img:first').css({'position': 'relative'});
// associate a 'history' stack with each parent
parent.each(function(i, el) { $(el).data("stack", new Array()); })
$("img.scrollable").on("click", function(evt) {
if (evt.originalEvent.altKey) return true;
if (!evt.originalEvent.shiftKey) {
var last = $(this);
last.parent().data("stack").push(last);
if (last) last.css({'z-index': parseInt(last.css('z-index'), 10) - 1});
} else {
var last = $(this).parent().data("stack").pop();
if (last) last.css({'z-index': parseInt(last.css('z-index'), 10) + 1});
}
evt.stopPropagation();
return false;
});
}
);
Click a figure to slide:
list(
randu %>% ggplot(aes(x = x, y = y)) +
geom_point() +
ggtitle("Slide 1"),
randu %>% ggplot(aes(x = x, y = z)) +
geom_point() +
ggtitle("Slide 2")
)


# Use first column as index
by_col1 <- (function(.) tibble::column_to_rownames(., colnames(.)[1]))
# Use index as new column `name`
ind2col <- (function(., name) tibble::rownames_to_column(., var = name))
take_rows <- function(df, rows = NULL) {
if (!is.null(rows)) {
return(df[rows[rows %in% rownames(df)], , drop = FALSE])
} else {
return(df)
}
}
# For reading wide tables
Sys.setenv("VROOM_CONNECTION_SIZE" = 2**19)
from_tsv <- function(filename, sep = "\t") {
filename %>%
vroom::vroom(del = sep, progress = FALSE) %>%
suppressMessages() %>%
by_col1()
}
# Writes dataframe to tab-separated file
# Precede by `ind2col("name for index")` to write the row names
to_tsv <- function(df, fn, sep = "\t") {
vroom::vroom_write(df, out_file(fn), delim = sep)
}
# A small random sub-dataframe (intended for genes x samples)
shample <- function(df) {
df[
sample(rownames(df), min(nrow(df), 333)),
sample(colnames(df), min(ncol(df), 33))
]
}
# Constructs a `SingleCellExperiment` checking consistency of sample names
make_sce <- function(counts, coldata) {
# Check that samples in counts = genes x samples
# are ordered as in coldata = samples x metafields
stopifnot(assertthat::are_equal(colnames(counts), rownames(coldata)))
SingleCellExperiment::SingleCellExperiment(
assays = list(counts = counts), colData = coldata
)
}
mock_sce <- function(genes, ncells) {
sce <- scater::mockSCE(ngenes = length(genes), ncells = ncells)
sce <- make_sce(
sce@assays@data$counts %>% as.data.frame(row.names = genes),
sce@colData %>% as.data.frame() %>% rename(celltype = Mutation_Status)
)
sce
}
drop_empty <- function(sce) {
sce[, colSums(abs(sce@assays@data$counts)) != 0]
}
# Normalizes samplewise to sum(expr) = 1
# Consider using sce %>% scater::logNormCounts() %>% ...
norm1 <- function(sce) {
sce@assays@data$counts %<>%
t() %>%
{
(.) / (rowSums(.))
} %>%
t()
sce
}
# Example
mock_sce(genes = LETTERS[1:10], ncells = 3) %>%
take_rows(LETTERS[1:5]) %>%
drop_empty() %>%
norm1() %>%
scater::logNormCounts() %>%
scater::aggregateAcrossFeatures(ids = c("V", "C", "C", "C", "V"), average = TRUE)
## class: SingleCellExperiment
## dim: 2 3
## metadata(0):
## assays(1): counts
## rownames(2): C V
## rowData names(0):
## colnames(3): Cell_001 Cell_002 Cell_003
## colData names(4): celltype Cell_Cycle Treatment sizeFactor
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
taxid <- list(mm = 10090, hs = 9606)
| mm | hs |
|---|---|
| 10090 | 9606 |
symbol2geneid <- function(genes) {
data.frame(symbol = genes) %>%
merge(
y = org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
all.x = TRUE,
by = "symbol",
sort = FALSE # !
) %>%
pull(gene_id, symbol) %>%
`[`(genes) # restore the original order
}
c("Jun", "Junb", "Junc") %>%
data.frame(symbol = .) %>%
mutate(gene_id = symbol2geneid(symbol)) %>%
kable()
| symbol | gene_id |
|---|---|
| Jun | 16476 |
| Junb | 16477 |
| Junc | NA |
dealias <- function(genes, species = "Mm") {
# `alias2SymbolTable` returns of vector of the same length as the vector of aliases.
# If an alias maps to more than one symbol, lowest Entrez ID number is returned.
# If an alias can't be mapped, then NA is returned.
genes %>%
limma::alias2SymbolTable(species = species) %>%
{
is.na(.) %>% {
if (any(.)) {
warning(paste(
"Dealiasing with `limma` genes resulted in", sum(.), "NAs, e.g. at:",
paste(sample(genes[.], size = min(5, sum(.))), collapse = ", ")
))
}
}
# Keep old names instead of NA
. <- dplyr::coalesce(., genes)
stopifnot(all(!is.na(.)))
stopifnot(mean((.) == genes) > 0.7)
(.)
}
}
dealias_sce <- function(sce) {
rownames(sce) %<>% dealias()
sce
}
omnipath.in <-
path_to("*/*/*/omnipath_in.csv.gz") %>%
from_tsv() %>%
select(
source, source_genesymbol, target, target_genesymbol,
organism,
is_inhibition, is_stimulation
)
Preview:
omnipath.tf <-
path_to("*/*/*/omnipath_tf.csv.gz") %>%
from_tsv() %>%
select(
source, source_genesymbol, target, target_genesymbol,
organism,
is_inhibition, is_stimulation
)
Preview:
omnipath.lr <-
path_to("*/*/*/omnipath_lr.csv.gz") %>%
from_tsv()
Preview:
We use Omnipath to annotate genes with their role using the following function.
# For a vector of gene symbols returns
# a vector containing the role of each symbol
symbol2role <- function(symbol, organism = taxid$mm) {
f <- function(df) {
df %>% filter(organism == organism)
}
data.frame(
TF = if_else(symbol %in% f(omnipath.tf)$source_genesymbol, "TF", ""),
Tgt = if_else(symbol %in% f(omnipath.tf)$target_genesymbol, "Tgt", ""),
Lig = if_else(symbol %in% f(omnipath.lr)$source_genesymbol, "Lig", ""),
Rec = if_else(symbol %in% f(omnipath.lr)$target_genesymbol, "Rec", "")
) %>%
apply(MARGIN = 1, FUN = function(row) {
if (any(nzchar(row))) {
paste(row[nzchar(row)], collapse = "/")
} else {
"--"
}
})
}
For example:
data.frame(symbol = c("Jun", "Notch1", "Xyz")) %>%
mutate(role = symbol2role(symbol)) %>%
kable()
| symbol | role |
|---|---|
| Jun | TF/Tgt |
| Notch1 | Tgt/Lig/Rec |
| Xyz | – |
The following provides a gene name (description) and a longer explanation (summary).
gene.summary <-
taxid %>%
lapply(
function(i) {
path_to(paste0("*/NCBI/gene/*/", names(which(. == i)), ".tsv.gz")) %>%
from_tsv() %>%
ind2col("gene_id")
}
)
Preview:
The following function sets up a gene id/symbol homology table.
# homology(taxid$mm, taxid$hs) %>% head
# gives a dataframe like
# gene_id.y gene_id.x symbol.x symbol.y
# 1 1 117586 A1bg A1BG
# 2 2 232345 A2m A2M
# 3 9 17961 Nat2 NAT1
# 4 10 17960 Nat1 NAT2
# 5 12 16625 Serpina3c SERPINA3
# 6 13 67758 Aadac AADAC
homology <- function(taxid.x, taxid.y) {
# Assume that `gene_id` is unique
symbols <- rbind(
org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
org.Hs.eg.db::org.Hs.egSYMBOL2EG %>% as.data.frame()
)
path_to("*/MGI/*/vertebrate_homology.tsv*") %>%
from_tsv() %>%
select(gene_id = EntrezGene_ID, taxid = NCBI_Taxon_ID, hg = HomoloGene_ID) %>%
filter(taxid %in% c(taxid.x, taxid.y)) %>%
mutate(taxid = if_else(taxid == taxid.x, "x", "y")) %>%
stats::reshape(direction = "wide", idvar = "hg", timevar = "taxid") %>%
# "multiple rows match for taxid"
suppressWarnings() %>%
select(gene_id.x, gene_id.y) %>%
merge(y = symbols %>% select(gene_id.x = gene_id, symbol.x = symbol), by = "gene_id.x") %>%
merge(y = symbols %>% select(gene_id.y = gene_id, symbol.y = symbol), by = "gene_id.y")
}
For example:
homology(taxid$mm, taxid$hs) %>%
head(11) %>%
{
.[, order(colnames(.))]
} %>%
mutate(seems.resonable = (tolower(symbol.x) == tolower(symbol.y))) %>%
rbind("...") %>%
kable()
| gene_id.x | gene_id.y | symbol.x | symbol.y | seems.resonable |
|---|---|---|---|---|
| 117586 | 1 | A1bg | A1BG | TRUE |
| 232345 | 2 | A2m | A2M | TRUE |
| 17961 | 9 | Nat2 | NAT1 | FALSE |
| 17960 | 10 | Nat1 | NAT2 | FALSE |
| 16625 | 12 | Serpina3c | SERPINA3 | FALSE |
| 67758 | 13 | Aadac | AADAC | TRUE |
| 227290 | 14 | Aamp | AAMP | TRUE |
| 11298 | 15 | Aanat | AANAT | TRUE |
| 234734 | 16 | Aars | AARS1 | FALSE |
| 268860 | 18 | Abat | ABAT | TRUE |
| 11303 | 19 | Abca1 | ABCA1 | TRUE |
| … | … | … | … | … |
We use that to add description/summary to mouse genes by homology.
gene.summary$mm %<>%
merge(
x = .,
y = merge(
x = gene.summary$hs %>%
select(gene_id, summ = summary, desc = description) %>%
mutate(desc = if_else(is.na(desc), desc, paste("By homology:", desc))) %>%
mutate(summ = if_else(is.na(summ), summ, paste("By homology:", summ))),
y = path_to("*/MGI/*/vertebrate_homology.tsv*") %>%
from_tsv() %>%
filter(NCBI_Taxon_ID %in% taxid) %>%
select(gene_id = EntrezGene_ID, taxid = NCBI_Taxon_ID, hg = HomoloGene_ID) %>%
stats::reshape(direction = "wide", idvar = "hg", timevar = "taxid") %>%
select(gene_id = gene_id.9606, alt_gene_id = gene_id.10090),
by = "gene_id",
all = FALSE
) %>%
select(gene_id = alt_gene_id, alt_desc = desc, alt_summ = summ),
by = "gene_id",
all.x = TRUE,
all.y = FALSE,
sort = FALSE
) %>%
mutate(summary = if_else(!is.na(summary), summary, alt_summ)) %>%
mutate(description = if_else(!is.na(description), description, alt_desc)) %>%
select(-alt_desc, -alt_summ)
## Warning in reshapeWide(data, idvar = idvar, timevar = timevar, varying =
## varying, : multiple rows match for taxid=10090: first taken
## Warning in reshapeWide(data, idvar = idvar, timevar = timevar, varying =
## varying, : multiple rows match for taxid=9606: first taken
Preview:
goi <-
list(
neuroinflammation = list(
name = "Neuroinflammation",
genes = unique(c(
# "Early response" transcription factors: Fos, Fosb, Junb
# From 2018-Vanlandewijck-He-...-Betsholtz:
# "A molecular atlas of cell types and zonation in the brain vasculature"
c("Fos", "Fosb", "Junb", "Egr1"), # Jun is below
# Wikipathways Neuroinflammation:
# https://www.wikipathways.org/instance/WP4919_r111814
# Downloaded from
# http://www.gsea-msigdb.org/gsea/msigdb/cards/WP_NEUROINFLAMMATION.html
c("Ascc1", "Chuk", "Fos", "Jun", "Mapk14", "Mapk8", "mt-Co1", "mt-Co2", "Mtor", "Nfkbia", "Nos2", "Rela", "Tlr4")
))
),
# From 2018-Vanlandewijck-He-...-Betsholtz
# Pericyte markers:
# Pdgfrb, Cspg4, and Des
# SMC:
# Acta2 and Tagln
# Fibroblast markers:
# Pdgfra, Lum and Dcn
pvf = list(
name = "PVF",
genes = c(
# From 2021-Manberg-...-Lewandowski, p.643
"Spp1", "Col6a1", "Col1a1", "Mmp2",
# From 2018-Vanlandewijck-He-...-Betsholtz
"Pdgfra", "Lum", "Dcn"
)
),
# Collagen
col = list(
name = "Collagen",
genes = c(
"Col10a1", "Col11a1", "Col11a2", "Col12a1", "Col13a1", "Col14a1",
"Col15a1", "Col16a1", "Col17a1", "Col18a1", "Col19a1", "Col1a1",
"Col1a2", "Col20a1", "Col22a1", "Col23a1", "Col24a1", "Col25a1",
"Col26a1", "Col27a1", "Col28a1", "Col2a1", "Col3a1", "Col4a1",
"Col4a2", "Col4a3", "Col4a3bp", "Col4a4", "Col4a5", "Col4a6",
"Col5a1", "Col5a2", "Col5a3", "Col6a1", "Col6a2", "Col6a3",
"Col6a4", "Col6a5", "Col6a6", "Col7a1", "Col8a1", "Col8a2",
"Col9a1", "Col9a2", "Col9a3", "Colec10", "Colec11", "Colec12",
"Colgalt1", "Colgalt2", "Colq"
)
# Obtained using
# sce_dm %>% rownames() %>% { (.)[stringr::str_detect(., "^Col")] } %>% sort()
),
# Mitochondrial
mt = list(
name = "Mitochondrial",
genes = c(
"mt-Atp6", "mt-Atp8", "mt-Co1", "mt-Co2", "mt-Co3", "mt-Cytb",
"mt-Nd1", "mt-Nd2", "mt-Nd3", "mt-Nd4", "mt-Nd4l", "mt-Nd5", "mt-Nd6"
)
),
# Obtained using (e.g.)
# sce_dm %>% rownames() %>% { (.)[stringr::str_detect(., "^mt-")] } %>% sort()
air = list(
name = "Aerobic respiration",
genes = c(
# GO:1903715 (regulation of aerobic respiration)
# http://www.informatics.jax.org/vocab/gene_ontology/GO:1903715
c("Actn3", "Akt1", "Bnip3", "Cbfa2t3", "Hif1a", "Ide", "Nop53", "Shmt2", "Trpv4", "Vcp"),
# GO:0009060 (aerobic respiration) minus GO:1903715 and minus 'mt-Co3'
# http://www.informatics.jax.org/vocab/gene_ontology/GO:0009060
c("Aco1", "Aco2", "Adsl", "Afg1l", "Atp5f1d", "Bloc1s1", "Cat", "Cox10", "Cox4i1", "Cox4i2", "Cox5a", "Cox5b", "Cox6a1", "Cox6a2", "Cox7c", "Cs", "Csl", "Cycs", "Cyct", "Dhtkd1", "Dlat", "Dlst", "Fh", "Fxn", "Idh1", "Idh2", "Idh3a", "Idh3b", "Idh3g", "Ireb2", "Mdh1", "Mdh1b", "Mdh2", "Mtco1", "Mtfr1", "Mtfr1l", "Mtfr2", "Mtnd1", "Mtnd4", "Mup1", "Mup11", "Mup2", "Mup3", "Mup4", "Mup5", "Mup6", "Ndufs4", "Ndufs7", "Ndufs8", "Ogdh", "Ogdhl", "Oxa1l", "Pank2", "Pdha1", "Pdha2", "Pdhb", "Proca1", "Q8BPC6", "Sdha", "Sdhaf2", "Sdhb", "Sdhc", "Sdhd", "Sirt3", "Sucla2", "Suclg1", "Suclg2", "Ucn", "Uqcr10", "Uqcrb", "Uqcrh")
)
),
fib_mig = list(
name = "Reg. fib. mig.",
genes = c(
# GO:0010762 (regulation of fibroblast migration)
# http://www.informatics.jax.org/vocab/gene_ontology/GO:0010762
c("Acta2", "Actr3", "Adipor2", "Ager", "Akap12", "Akt1", "Appl1", "Appl2", "Arhgap4", "Bag4", "Braf", "Cln3", "Coro1c", "Cygb", "Ddr2", "Dmtn", "Fer", "Fgf2", "Fgfr1", "Gna12", "Has1", "Hyal2", "Itgb1", "Itgb1bp1", "Itgb3", "MACIR", "Mmp1a", "Mta2", "Pak1", "Pak3", "Prkce", "Prr5l", "Ptk2", "Rac1", "Rcc2", "Rffl", "Sdc4", "Slc8a1", "Tgfb1", "Thbs1", "Tsc2", "Uts2", "Wdpcp")
)
),
glycolysis = list(
name = "Anaerobic glycolysis",
genes = c(
# GO:0006096 (glycolytic process / anaerobic glycolysis)
# http://www.informatics.jax.org/vocab/gene_ontology/GO:0006096
c("Actn3", "Adpgk", "Aldoa", "Aldoart1", "Aldoart2", "Aldob", "Aldoc", "App", "Bpgm", "Cbfa2t3", "Ddit4", "Dhtkd1", "Eif6", "Eno1", "Eno2", "Eno3", "Eno4", "Entpd5", "Ep300", "Esrrb", "Fbp1", "Foxk1", "Foxk2", "Gale", "Galk1", "Galt", "Gapdh", "Gapdhs", "Gck", "Git1", "Gm10358", "Gm11214", "Gm12117", "Gm15294", "Gm3839", "Gpd1", "Gpi", "Hdac4", "Hif1a", "Hk1", "Hk2", "Hk3", "Hkdc1", "Htr2a", "Ier3", "Ifng", "Igf1", "Il3", "Ins2", "Insr", "Jmjd8", "Khk", "Mif", "Mlxipl", "Mpi", "Mtch2", "Myc", "Myog", "Ncor1", "Nupr1", "Ogdh", "Ogt", "P2rx7", "Pfkfb2", "Pfkl", "Pfkm", "Pfkp", "Pgam1", "Pgam2", "Pgk1", "Pgk2", "Pklr", "Pkm", "Ppara", "Ppargc1a", "Prkaa1", "Prkaa2", "Prkag2", "Prkag3", "Prxl2c", "Psen1", "Sirt6", "Slc2a6", "Slc4a1", "Slc4a4", "Stat3", "Tigar", "Tkfc", "Tpi1", "Trex1", "Zbtb20", "Zbtb7a")
)
),
# https://reactome.org/PathwayBrowser/#/R-MMU-877300&SEL=R-MMU-873829&DTAB=MT
# Motivated by https://www.nature.com/articles/s41593-020-00770-9
ifng_signaling = list(
name = "Interferon gamma signaling",
genes = c("Ifngr1", "Ifngr2", "Jak1", "Jak2", "Ifng")
),
eae_common_up = list(
name = "Up in EAE in CB & Dm",
genes = c("Wif1", "Flrt2", "Scgb1a1", "Tcim", "Pcdh19", "Ifi47", "Aqp1", "Npy1r", "Serpina3g", "Ak1", "Kng1", "Plac8", "Rai14", "Nup50", "Cd74", "Adam12", "Tnfaip2", "Cd44", "Zfp608", "Cldn11", "Chst2") %>% unique()
)
)
# Use a function because the list may change
get_goi_genes <- function() {
goi %>%
lapply(as.data.frame) %>%
dual(base::rbind) %>%
pull(genes) %>%
unique()
}
get_goi_genes()[1:7] %>%
c("...") %>%
t() %>%
kable()
| Fos | Fosb | Junb | Egr1 | Ascc1 | Chuk | Jun | … |
# Return a vector `goi_set` indicating the gene set for each `symbol`
symbol2goiset <- function(genes) {
df <- data.frame(symbol = genes, goi_set = "Other")
for (goi in goi) {
df %<>% mutate(goi_set = if_else(symbol %in% goi$genes, goi$name, goi_set))
}
(df %>% pull(goi_set, symbol))[genes]
}
show_gene_info_table <- function(df) {
df$symbols # need this column
df %>%
mutate(role = symbol2role(symbol)) %>%
mutate(goi_set = symbol2goiset(symbol)) %>%
mutate(gene_id = symbol2geneid(symbol)) %>%
merge(gene.summary$mm, all.x = TRUE, by = "gene_id", sort = FALSE) %>%
mutate(description = if_else(is.na(summary), description, paste0("<details>", "<summary>", description, "</summary>", summary, "</details>"))) %>%
mutate(description = paste0("<span style='font-size:70%'>", description, "</span>")) %>%
select(-summary) %>%
mutate(symbol = as.link(symbol)) %>%
DT::datatable(rownames = NULL, escape = FALSE, width = "100%")
}
get_goi_genes() %>%
sample(size = 3) %>%
data.frame(symbol = .) %>%
rbind("...") %>%
show_gene_info_table()
# TODO: normalize with edgeR?
# Plots the PCA, showing the expression of genes
show_goi_pca <- function(sce, genes, group.by = "celltype", colors = ramp(as.character(sce@colData[[group.by]]))) {
cbind(
# Reduced dim coordinates
sce@int_colData$reducedDims$PCA %>%
as.data.frame(row.names = colnames(sce)) %>%
select(x = PC1, y = PC2),
# Metadata
sce@colData %>%
as.data.frame(row.names = colnames(sce)) %>%
select(celltype = all_of(group.by)),
# Expression data for genes-of-interest
sce@assays@data$counts %>%
log1p() %>%
`[`(genes[genes %in% rownames(.)], ) %>%
t()
) %>%
# Make long table
reshape2::melt(
# Keep as separate columns:
id.vars = c("x", "y", "celltype"),
# These columns will be stacked, in a column with this name:
measure.vars = genes, variable.name = "symbol"
) %>%
# Plot
ggplot(aes(x = x, y = y, color = celltype)) +
geom_point(aes(size = value), alpha = 0.4) +
scale_color_manual(values = colors, breaks = gglast(colour)) +
# theme_void() +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
) +
labs(color = group.by, size = "Expression") +
facet_wrap(vars(symbol))
}
get_goi_genes() %>%
mock_sce(genes = ., ncells = 77) %>%
scater::logNormCounts() %>%
scater::runPCA() %>%
show_goi_pca(goi$pvf$genes, group.by = "Cell_Cycle", colors = list(`G0` = "Red", `G1` = "Blue", `G2M` = "Orange", `S` = "DarkGreen", `Dead` = "Black"))
## Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth =
## TRUE, : You're computing too large a percentage of total singular values, use a
## standard svd instead.

# Aggregates genes by `group.by` for each geneset from `goi`
goi_agg_by_group <- function(sce, group.by = "celltype") {
goi %>%
lapply(function(goi) {
(dual(rbind))(as.list(group.by) %>% lapply(function(group.by) {
if (any(goi$genes %in% rownames(sce))) {
cbind(
# Metadata
sce@colData %>%
as.data.frame(row.names = colnames(sce)) %>%
select(subgroup = all_of(group.by)),
# Expression data for genes-of-interest
sce %>%
scater::logNormCounts() %>%
SingleCellExperiment::logcounts() %>%
take_rows(goi$genes) %>%
t()
) %>%
# Aggregate by sample
group_by(subgroup) %>%
summarise(across(everything(), mean)) %>%
# Make long table
reshape2::melt(
# Keep as separate columns:
id.vars = c("subgroup"),
# Other columns will be stacked, in a column with this name:
variable.name = "symbol"
# A column `value` contains their values
) %>%
mutate(group = group.by) %>%
mutate(goi_set = goi$name)
}
}))
}) %>%
# Relabel the elements of the list
dual(rbind) %>%
split(.$goi_set)
# cf.
# https://stackoverflow.com/questions/33775239/emulate-split-with-dplyr-group-by-return-a-list-of-data-frames
}
get_goi_genes() %>%
mock_sce(genes = ., ncells = 77) %>%
goi_agg_by_group(group.by = "Treatment") %>%
lapply(head)
## $`Aerobic respiration`
## subgroup symbol value group goi_set
## air.1 treat1 Actn3 4.615360 Treatment Aerobic respiration
## air.2 treat2 Actn3 4.467249 Treatment Aerobic respiration
## air.3 treat1 Akt1 6.284252 Treatment Aerobic respiration
## air.4 treat2 Akt1 6.708739 Treatment Aerobic respiration
## air.5 treat1 Bnip3 4.032117 Treatment Aerobic respiration
## air.6 treat2 Bnip3 4.439017 Treatment Aerobic respiration
##
## $`Anaerobic glycolysis`
## subgroup symbol value group goi_set
## glycolysis.1 treat1 Actn3 4.615360 Treatment Anaerobic glycolysis
## glycolysis.2 treat2 Actn3 4.467249 Treatment Anaerobic glycolysis
## glycolysis.3 treat1 Adpgk 8.331362 Treatment Anaerobic glycolysis
## glycolysis.4 treat2 Adpgk 8.688921 Treatment Anaerobic glycolysis
## glycolysis.5 treat1 Aldoa 4.464087 Treatment Anaerobic glycolysis
## glycolysis.6 treat2 Aldoa 3.784999 Treatment Anaerobic glycolysis
##
## $Collagen
## subgroup symbol value group goi_set
## col.1 treat1 Col10a1 4.141112 Treatment Collagen
## col.2 treat2 Col10a1 4.704114 Treatment Collagen
## col.3 treat1 Col11a1 6.739482 Treatment Collagen
## col.4 treat2 Col11a1 6.668717 Treatment Collagen
## col.5 treat1 Col11a2 2.624552 Treatment Collagen
## col.6 treat2 Col11a2 1.915401 Treatment Collagen
##
## $`Interferon gamma signaling`
## subgroup symbol value group goi_set
## ifng_signaling.1 treat1 Ifngr1 6.636068 Treatment Interferon gamma signaling
## ifng_signaling.2 treat2 Ifngr1 6.383147 Treatment Interferon gamma signaling
## ifng_signaling.3 treat1 Ifngr2 5.723872 Treatment Interferon gamma signaling
## ifng_signaling.4 treat2 Ifngr2 4.471666 Treatment Interferon gamma signaling
## ifng_signaling.5 treat1 Jak1 4.723866 Treatment Interferon gamma signaling
## ifng_signaling.6 treat2 Jak1 5.283164 Treatment Interferon gamma signaling
##
## $Mitochondrial
## subgroup symbol value group goi_set
## mt.1 treat1 mt-Atp6 0.7678226 Treatment Mitochondrial
## mt.2 treat2 mt-Atp6 1.3244967 Treatment Mitochondrial
## mt.3 treat1 mt-Atp8 3.6142808 Treatment Mitochondrial
## mt.4 treat2 mt-Atp8 3.1191546 Treatment Mitochondrial
## mt.5 treat1 mt-Co1 6.1507717 Treatment Mitochondrial
## mt.6 treat2 mt-Co1 6.1374289 Treatment Mitochondrial
##
## $Neuroinflammation
## subgroup symbol value group goi_set
## neuroinflammation.1 treat1 Fos 9.165277 Treatment Neuroinflammation
## neuroinflammation.2 treat2 Fos 9.651745 Treatment Neuroinflammation
## neuroinflammation.3 treat1 Fosb 2.524046 Treatment Neuroinflammation
## neuroinflammation.4 treat2 Fosb 1.596128 Treatment Neuroinflammation
## neuroinflammation.5 treat1 Junb 3.356956 Treatment Neuroinflammation
## neuroinflammation.6 treat2 Junb 3.502692 Treatment Neuroinflammation
##
## $PVF
## subgroup symbol value group goi_set
## pvf.1 treat1 Spp1 4.709470 Treatment PVF
## pvf.2 treat2 Spp1 4.792833 Treatment PVF
## pvf.3 treat1 Col6a1 7.763346 Treatment PVF
## pvf.4 treat2 Col6a1 7.958577 Treatment PVF
## pvf.5 treat1 Col1a1 7.440287 Treatment PVF
## pvf.6 treat2 Col1a1 7.579705 Treatment PVF
##
## $`Reg. fib. mig.`
## subgroup symbol value group goi_set
## fib_mig.1 treat1 Acta2 7.822436 Treatment Reg. fib. mig.
## fib_mig.2 treat2 Acta2 7.412699 Treatment Reg. fib. mig.
## fib_mig.3 treat1 Actr3 2.130581 Treatment Reg. fib. mig.
## fib_mig.4 treat2 Actr3 1.641021 Treatment Reg. fib. mig.
## fib_mig.5 treat1 Adipor2 8.432863 Treatment Reg. fib. mig.
## fib_mig.6 treat2 Adipor2 8.130264 Treatment Reg. fib. mig.
##
## $`Up in EAE in CB & Dm`
## subgroup symbol value group goi_set
## eae_common_up.1 treat1 Wif1 8.519505 Treatment Up in EAE in CB & Dm
## eae_common_up.2 treat2 Wif1 8.328938 Treatment Up in EAE in CB & Dm
## eae_common_up.3 treat1 Flrt2 9.373898 Treatment Up in EAE in CB & Dm
## eae_common_up.4 treat2 Flrt2 9.674197 Treatment Up in EAE in CB & Dm
## eae_common_up.5 treat1 Scgb1a1 5.283624 Treatment Up in EAE in CB & Dm
## eae_common_up.6 treat2 Scgb1a1 4.359214 Treatment Up in EAE in CB & Dm
# For each cell, aggregate each gene set
# Returns a sample x geneset table (with additional metadata columns)
goi_agg_by_genes <- function(sce) {
cbind(
# Sample metadata
sce@colData %>%
as.data.frame() %>%
select(any_of(c("cluster", "celltype", "group", "subgroup", "Group"))),
# Mean expression for genes-of-interest
lapply(goi, function(goi) {
list() %>% within({
assign(
goi$name,
sce@assays@data$counts %>%
log1p() %>%
take_rows(goi$genes) %>%
colMeans()
)
})
}),
# Library size
data.frame(lib.size = sce@assays@data$counts %>% colSums()),
# Reduced dim coordinates
sce %>%
scater::logNormCounts() %>%
scater::runPCA() %>%
SingleCellExperiment::reducedDim("PCA") %>%
as.data.frame() %>%
select(x = PC1, y = PC2, z = PC3)
)
}
get_goi_genes() %>%
mock_sce(genes = ., ncells = 77) %>%
goi_agg_by_genes() %>%
head() %>%
DT::datatable(width = "100%", options = list(scrollX = TRUE))
## Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth =
## TRUE, : You're computing too large a percentage of total singular values, use a
## standard svd instead.
This is a set of differential genes in kidney fibroblast activation from Deng et al., 2021.
fb_genes_si7 <- path_to("*/Deng-Wei-2021/*/SI7.*.gz") %>% from_tsv()
# fb_genes_si8 <- path_to("*/Deng-Wei-2021/*/SI8.*.gz") %>% from_tsv()
show_tbl <- function(tbl) {
tbl %<>%
ind2col("symbol") %>%
mutate(symbol = as.link(symbol)) %>%
select(symbol, logFC)
tbl %>%
DT::datatable(escape = FALSE) %>%
DT::formatSignif(colnames(tbl %>% select(-symbol)), digits = 3)
}
The list of genes is from Dorrier-Aran-…-Daneman, 2021, Extended Data Fig. 4.
if (!("c0" %in% names(goi))) {
goi %<>%
c(
list(
`0` = c("Col1a2", "Bgn", "Col3a1", "Col8a1", "Igf1", "Ifitm1", "Col1a1", "Cfh", "Sparc", "B2m"),
`1` = c("Apod", "Trf", "Dcn", "Gsn", "Gpc3", "Tgfbi", "Ccl11", "Smoc2", "Lum", "Cst3"),
`2` = c("Lgals1", "Timp1", "Rpl41", "Rps18", "Rps12", "Col4a1", "Cxcl10"),
`3` = c("Junb", "Fosb", "Fos", "Ptn", "Igfbp2", "Klf4", "Clec3b", "Apoe", "Rbp4", "Vtn"),
`4` = c("Rbp1", "Fth1", "Aldh1a2", "Fn1", "Igfbp3", "Slc7a11", "Ptgds", "Timp3", "Igfbp5"),
`5` = c("Ptch1", "Igfbp6", "Tmsb4x", "S100a6", "Igfbp7", "Mgp", "Saa1"),
`6` = c("Itih5", "Spp1", "Klf2", "Ubb", "Jund", "Mt1"),
`7` = c("Rgs5", "Acta2", "Myl9", "Gm13889", "Tagln", "Tpm2", "Actb")
) %>%
stack() %>%
mutate(cluster = paste0("c", ind), name = paste0("Dm #", ind)) %>%
split(.$cluster) %>%
lapply(function(c) list(name = unique(c$name), genes = c$values))
)
}
heatmap <- function(sce, colors = NULL,
anno_col = NULL, anno_row = NULL,
...) {
# `colors` should be like list(`negative` = "Blue", `positive` = "Red", ...)
colors %<>% c(NULL) %>% unlist()
if (!is.null(anno_row)) {
stopifnot(assertthat::are_equal(rownames(sce), rownames(anno_row)))
}
if (!is.null(anno_col)) {
stopifnot(assertthat::are_equal(colnames(sce), rownames(anno_col)))
}
anno_col %<>%
list(
sce@colData %>%
as.data.frame() %>%
select(any_of(c("celltype", "subgroup", "Group", "cluster", "Treatment")))
) %>%
dual(cbind)
sce %>%
SingleCellExperiment::counts() %>%
log1p() %>%
{
# (.) - rowMeans(.)
# (.) / rowSums(.)
} %>%
pheatmap::pheatmap(
show_colnames = FALSE,
treeheight_row = 0,
treeheight_col = 0,
cluster_rows = FALSE,
fontsize = 6,
color = grDevices::colorRampPalette(c("black", "yellow"))(10),
annotation_col = anno_col,
annotation_row = anno_row,
annotation_colors = c(
# TODO: use blue-red for LFC
anno_row %>%
as.list() %>%
lapply(ramp),
anno_col %>%
as.list() %>%
lapply(function(x) {
if (all(x %in% names(colors))) {
colors[unique(as.character(x))] %>% unlist()
} else {
ramp(x)
}
})
),
...
)
}
show_dm_heatmap <- function(sce, anno_row = NULL, ...) {
dm_genes <-
goi[grep("c[0-9]", goi %>% names())] %>%
lapply(as.data.frame) %>%
dual(rbind) %>%
pull(name, genes)
if (!is.null(anno_row)) {
stopifnot(assertthat::are_equal(rownames(sce), rownames(anno_row)))
}
anno_row %<>%
take_rows(names(dm_genes))
sce %<>%
take_rows(names(dm_genes))
anno_row %<>%
list(data.frame(dm.cluster = dm_genes[rownames(sce)])) %>%
dual(cbind)
heatmap(sce, anno_row = anno_row, ...)
}
unlist(goi[grep("c[0-9]", goi %>% names())]) %>%
mock_sce(ncells = 77) %>%
{
(.)[, order((.)$celltype)]
} %>%
show_dm_heatmap(
colors = colors,
cluster_cols = FALSE,
anno_col = data.frame(x = ((1:ncol(.)) %% 2), row.names = colnames(.)),
anno_row = data.frame(y = ((1:nrow(.)) %% 2), row.names = rownames(.))
)

The following table shows all the “genes of interest”.
get_goi_genes() %>%
data.frame(symbol = .) %>%
show_gene_info_table()
Small dataset to run through quickly.
sce_dummy <- readRDS(path_to("*/*/sce_dummy.rds"))
# Made with:
# load_dataset$dm(size = 77) %>% keep_only_goi() %>% saveRDS(file = "sce_dummy.rds")
## class: SingleCellExperiment
## dim: 267 74
## metadata(0):
## assays(1): counts
## rownames(267): Fos Fosb ... Zbtb20 Zbtb7a
## rowData names(0):
## colnames(74): healthy2_AATCCAGCATAGTAAG EAE1_AAAGATGTCGGAAATA ...
## EAE1_CTGTTTAAGAGTCTGG EAE1_TTAGTTCTCTCTGAGA
## colData names(4): group subgroup cluster celltype
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
data.frame(
x = sce_dummy@assays@data$counts %>% colSums(),
subgroup = sce_dummy$subgroup
) %>%
ggplot(aes(x = x, color = subgroup)) +
scale_color_manual(values = colors[as.character(gglast(colour))]) +
geom_density()

scater_attach <- function(sce) {
scater::addPerCellQC(
x = sce,
subsets = list(Mito = grep("mt-", rownames(sce)))
) %>%
scater::logNormCounts() %>%
scater::runPCA(name = "PCA", ncomponents = 20) %>%
scater::runTSNE(perplexity = 10, dimred = "PCA") %>%
scater::runUMAP()
}
sce_dummy %<>% scater_attach()
## Read the 74 x 20 data matrix successfully!
## OpenMP is working. 1 threads.
## Using no_dims = 2, perplexity = 10.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 0.00 seconds (sparsity = 0.595690)!
## Learning embedding...
## Iteration 50: error is 78.068034 (50 iterations in 0.02 seconds)
## Iteration 100: error is 68.156823 (50 iterations in 0.02 seconds)
## Iteration 150: error is 63.090786 (50 iterations in 0.01 seconds)
## Iteration 200: error is 64.654885 (50 iterations in 0.02 seconds)
## Iteration 250: error is 59.937177 (50 iterations in 0.01 seconds)
## Iteration 300: error is 1.671723 (50 iterations in 0.01 seconds)
## Iteration 350: error is 1.252564 (50 iterations in 0.01 seconds)
## Iteration 400: error is 1.009886 (50 iterations in 0.01 seconds)
## Iteration 450: error is 0.757356 (50 iterations in 0.01 seconds)
## Iteration 500: error is 0.692289 (50 iterations in 0.01 seconds)
## Iteration 550: error is 0.654925 (50 iterations in 0.01 seconds)
## Iteration 600: error is 0.655375 (50 iterations in 0.01 seconds)
## Iteration 650: error is 0.645972 (50 iterations in 0.01 seconds)
## Iteration 700: error is 0.650949 (50 iterations in 0.01 seconds)
## Iteration 750: error is 0.648843 (50 iterations in 0.01 seconds)
## Iteration 800: error is 0.654552 (50 iterations in 0.01 seconds)
## Iteration 850: error is 0.648237 (50 iterations in 0.01 seconds)
## Iteration 900: error is 0.654290 (50 iterations in 0.01 seconds)
## Iteration 950: error is 0.650961 (50 iterations in 0.01 seconds)
## Iteration 1000: error is 0.648245 (50 iterations in 0.01 seconds)
## Fitting performed in 0.23 seconds.
## 23:19:10 UMAP embedding parameters a = 1.896 b = 0.8006
## 23:19:10 Read 74 rows and found 267 numeric columns
## 23:19:10 Reducing X column dimension to 50 via PCA
## 23:19:10 PCA: 50 components explained 95.41% variance
## 23:19:10 Using FNN for neighbor search, n_neighbors = 15
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/FNN/libs/FNN.so") ...
## 23:19:11 Commencing smooth kNN distance calibration using 4 threads
## 23:19:13 Initializing from normalized Laplacian + noise
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/RSpectra/libs/RSpectra.so") ...
## 23:19:13 Commencing optimization for 500 epochs, with 1758 positive edges
## 23:19:15 Optimization finished
sce_dummy %>%
scater::makePerCellDF() %>%
head(3) %>%
DT::datatable(width = "100%")
scater_show <- function(sce, color.by = "celltype", colors = NULL) {
items <- sce@colData[[color.by]]
colors <-
if (all(items %in% names(colors))) {
colors[unique(as.character(items))]
} else {
ramp(items, palette = "Set 2")
}
sce %>%
{
function(which_dimred) {
suppressMessages(
scater::plotReducedDim(
dimred = which_dimred,
object = (.),
colour_by = color.by
) +
scale_color_manual(values = colors) + # no need for `breaks`
labs(color = color.by) +
guides(color = guide_legend(override.aes = list(size = 2, alpha = 1))) +
theme(
axis.text = element_blank(),
axis.ticks = element_blank()
)
)
}
} %>%
{
gridExtra::arrangeGrob((.)("PCA"), (.)("TSNE"), (.)("UMAP"), nrow = 3)
}
}
sce_dummy %>%
scater_show(color.by = "subgroup", colors = colors) %>%
gridExtra::grid.arrange()

Prototype, not for direct use.
pca3d_template <- function(sce, color.by = "celltype", colors = ramp(sce@colData[[color.by]])) {
data.frame(
color_group = sce@colData[[color.by]],
lib.size = sce@assays@data$counts %>% colSums(),
SingleCellExperiment::reducedDim(sce, "PCA") %>%
as.data.frame() %>%
select(PC1, PC2, PC3)
) %>%
group_by(color_group) %>%
sample(size = 30, replace = TRUE) %>%
ungroup() %>%
# print() %>%
plotly::plot_ly() %>%
plotly::add_markers(
x = ~PC1, y = ~PC2, z = ~PC3,
marker = list(size = 2, opacity = 1),
color = ~color_group,
colors = colors
) %>%
plotly::layout(
scene = list(
xaxis = list(title = "PC1", showticklabels = FALSE),
yaxis = list(title = "PC2", showticklabels = FALSE),
zaxis = list(title = "PC3", showticklabels = FALSE)
)
)
}
sce_dummy %>% pca3d_template(color.by = "subgroup", colors = colors)
default_cyclic_genes <-
# Convert the "cyclic genes" of `Revelio` from Hs to Mm
Revelio::revelioTestData_cyclicGenes %>%
lapply(function(genes) {
merge(
x = data.frame(symbol.x = genes),
y = homology(taxid$hs, taxid$mm),
by = "symbol.x",
all.x = TRUE
) %>%
pull(symbol.y) %>%
sort(na.last = TRUE)
})
default_cyclic_genes %>%
as.data.frame() %>%
head() %>%
rbind("...") %>%
kable()
| G1.S | S | G2 | G2.M | M.G1 |
|---|---|---|---|---|
| Abca7 | a | 1700001L19Rik | Adh4 | 1700102P08Rik |
| Acd | Abcc2 | 1700066M21Rik | Ahi1 | 2700049A03Rik |
| Acyp1 | Abcc5 | Alkbh1 | Akirin2 | Afap1 |
| Adamts1 | Abhd10 | Anln | Ankrd40 | Agfg1 |
| Adck2 | Adam22 | Ap3d1 | Anln | Agpat3 |
| Adcy6 | Arhgap42 | Arhgap19 | Arhgap19 | Akap13 |
| … | … | … | … | … |
The DC1-DC2 plane is supposed to show a circle reflecting the cell cycle (cf. Fig 1 in Schwabe et al., 2020).
cell_cycle <- function(sce, batch.by = NA, cyclic_genes = default_cyclic_genes) {
list(
counts = sce@assays@data$counts,
orig.id = colnames(sce),
batch = if (is.na(batch.by)) "cell" else paste0(sce@colData[[batch.by]], "_")
) %>%
within({
colnames(counts) <- paste0(batch, 1:ncol(counts))
orig.id <- data.frame(old = orig.id, new = colnames(counts)) %>% pull(old, new)
}) %>%
with({
counts %>%
Revelio::createRevelioObject(cyclicGenes = cyclic_genes, lowernGeneCutoff = 0) %>%
Revelio::getCellCyclePhaseAssignInformation() %>%
Revelio::getPCAData() %>%
Revelio::getOptimalRotation() %>%
list(cyclic = .) %>%
with({
assertthat::assert_that(all(
rownames(cyclic@cellInfo) == colnames(cyclic@transformedData$pca$data)
))
cbind(
cyclic@transformedData$pca$data %>% `[`(1:min(5, nrow(.)), ) %>% t(),
cyclic@transformedData$dc$data %>% `[`(1:min(5, nrow(.)), ) %>% t(),
cyclic@cellInfo
)
}) %>%
data.frame(row.names = orig.id[rownames(.)])
}) %>%
cbind(as.data.frame(sce@colData)[rownames(.), , drop = FALSE])
}
cell_cycle_plot2d <- function(cco, colors = ramp(cco$ccPhase)) {
cco %>%
ggplot(aes(x = DC1, y = DC2, color = ccPhase)) +
geom_point(size = 3, stroke = 1) +
scale_color_manual(values = colors, breaks = gglast(colour))
}
cco_revelio <-
cell_cycle(
Revelio::revelioTestData_rawDataMatrix %>%
make_sce(
counts = .,
coldata = data.frame(
x = colnames(.) %>% strsplit("_") %>% dual(rbind),
row.names = colnames(.)
) %>% select(batch = x.1)
),
batch.by = "batch",
cyclic_genes = Revelio::revelioTestData_cyclicGenes
)
## 2021-07-08 20:11:07: calculating optimal rotation: 2021-07-08 20:11:07: calculating PCA: 2021-07-08 20:11:07: assigning cell cycle phases: 2021-07-08 20:11:07: reading data: 5.73secs
## 31.71secs
## 58.25secs
## 1.51mins
cco_revelio %>%
ggplot(aes(x = DC1, y = DC2, color = ccPhase, shape = batch)) +
geom_point(size = 3, stroke = 1) +
scale_color_manual(values = colors, breaks = gglast(colour))

cco_mock <-
unlist(as.list(default_cyclic_genes)) %>%
{
(.)[!is.na(.)]
} %>%
mock_sce(ncells = 33) %>%
cell_cycle()
## 2021-07-08 20:12:39: calculating optimal rotation: 2021-07-08 20:12:39: calculating PCA: 2021-07-08 20:12:39: assigning cell cycle phases: 2021-07-08 20:12:39: reading data: 0.01secs
## 0.54secs
## 0.62secs
## 1.06mins
cco_mock %>%
ggplot(aes(x = DC1, y = DC2, color = celltype, fill = ccPhase)) +
geom_point(shape = 21, size = 5) +
ggplot2::scale_color_manual(values = colors, breaks = gglast(colour)) +
ggplot2::scale_fill_manual(values = colors, breaks = gglast(fill))

cell_cycle_plot3d <- function(cco, colors = ramp(cco$ccPhase)) {
cco %>%
with({
plotly::plot_ly() %>%
plotly::add_markers(
x = DC1, y = DC2, z = DC3,
size = 4,
color = ccPhase,
colors = colors
)
})
}
cco_mock %>% cell_cycle_plot3d()
cco_mock %>%
with({
rgl::plot3d(DC1, DC2, DC3, size = 3, type = "s", col = ramp(ccPhase)[ccPhase])
rgl::rglwidget()
})
deseq <- function(sce, design) {
sce %>%
DESeq2::DESeqDataSet(design = design) %>%
DESeq2::estimateSizeFactors(type = "poscounts") %>%
DESeq2::DESeq(quiet = TRUE, fitType = "local") %>%
DESeq2::results() %>%
as.data.frame() %>%
select(base = baseMean, lfc = log2FoldChange, p = pvalue) %>%
mutate(base = log1p(base)) %>%
tidyr::drop_na() %>%
mutate(p = p.adjust(p, "BH"), p.unadj = p) %>%
ind2col("symbol") %>%
list(table = ., design = design)
}
edger <- function(sce, design) {
sce %>%
edgeR::estimateDisp(robust = TRUE) %>%
# edgeR::calcNormFactors(method = "TMM") %>%
# edgeR::estimateGLMTagwiseDisp(design = design) %>%
edgeR::glmFit(design = model.matrix(design, data = sce@colData)) %>%
edgeR::glmLRT(coef = 2) %>%
`$`(table) %>%
select(base = logCPM, lfc = logFC, p = PValue, LR = LR) %>%
tidyr::drop_na() %>%
mutate(p = p.adjust(p, "BH"), p.unadj = p) %>%
ind2col("symbol") %>%
list(table = ., design = design)
}
# Hacky fctn to determine colors for `lfc > 0` and `lfc < 0`
lfc_colors <- function(sce, design, colors) {
f <- terms(design) %>% attr("factors")
stopifnot((nrow(f) == 1) && (ncol(f) == 1))
f <- sce@colData[[f %>% colnames()]]
cc <- colors[as.character(f %>% levels())] # TODO: need `as.character`?
cc <- c(cc[[1]], (rowMeans(col2rgb(cc[-1])) / 255) %>% as.list() %>% dual(rgb))
(. %>% (colorRamp(cc)) %>% RGB())
}
compute_de <- function(sce, design) {
list(
deseq = suppressMessages(deseq(sce, design)),
edger = suppressMessages(edger(sce, design))
) %>%
with({
list(
table = rbind(
deseq$table %>%
select(symbol, base, lfc, p) %>%
mutate(src = "DESeq2"),
edger$table %>%
select(symbol, base, lfc, p) %>%
mutate(src = "edgeR")
),
design = design
)
}) %>%
within({
# Attach LFC color (not very important)
table %<>%
group_by(src) %>%
mutate(color = (lfc_colors(sce, design, colors))(percent_rank(lfc))) %>%
ungroup()
})
}
sce_dummy.de <- sce_dummy %>% compute_de(~group)
sce_dummy.de$table %>%
head() %>%
rbind("...") %>%
kable()
| symbol | base | lfc | p | src | color |
|---|---|---|---|---|---|
| Fos | 1.94433786752507 | 1.03952791369199 | 0.0350644319817966 | DESeq2 | #C98144FF |
| Fosb | 1.32188433643544 | 0.392699195334891 | 0.94357785439112 | DESeq2 | #A09B36FF |
| Junb | 1.71655787188708 | 0.618371363877933 | 0.403770457436529 | DESeq2 | #B0913BFF |
| Spp1 | 0.685515786871741 | 1.75726212594778 | 0.121724809299487 | DESeq2 | #E4704DFF |
| Col6a1 | 0.644138066638724 | 0.767376344078733 | 0.324044869548652 | DESeq2 | #B68D3DFF |
| Col1a1 | 2.86899094589606 | 1.87748518409641 | 1.75462356248164e-09 | DESeq2 | #E86E4EFF |
| … | … | … | … | … | … |
show_de_table <- function(.) {
(.) %>%
group_by(src) %>%
slice_min(p, n = 777) %>%
slice_max(abs(lfc), n = 777) %>%
ungroup() %>%
merge(
y = org.Mm.eg.db::org.Mm.egSYMBOL2EG %>% as.data.frame(),
all.x = TRUE,
by = "symbol"
) %>%
merge(
y = gene.summary$mm %>% select(gene_id, description, summary),
all.x = TRUE,
by = "gene_id"
) %>%
select(-any_of("gene_id")) %>%
select(-any_of("color")) %>%
# Role: TF, Rec, Lig, etc
mutate(role = symbol2role(symbol)) %>%
mutate(symbol = as.link(symbol)) %>%
mutate(description = if_else(is.na(summary), description, paste0("<details>", "<summary>", description, "</summary>", summary, "</details>"))) %>%
select(-summary) %>%
mutate(description = paste0("<span style='font-size: 70%;'>", description, "</span>")) %>%
DT::datatable(escape = FALSE) %>%
DT::formatSignif(c("base", "lfc", "p"), digits = 3)
}
sce_dummy.de$table %>%
group_by(src) %>%
slice_min(p, n = 33) %>%
ungroup() %>%
show_de_table()
show_de_heatmap <- function(sce, de_table, n = 33, ...) {
top <- de_table %>%
group_by(lfc > 0) %>%
slice_min(p, n = n, with_ties = FALSE) %>%
ungroup() %>%
arrange(lfc) %>%
select(symbol, lfc, p) %>%
mutate(`-log10(p)` = -log10(p)) %>%
by_col1() %>%
`[`(rownames(.) %in% rownames(sce), )
sce[rownames(top), ] %>%
heatmap(anno_row = top, ...)
}
sce_dummy %>%
show_de_heatmap(sce_dummy.de$table %>% filter(src == "edgeR"), colors = colors)

show_de_volcano <- function(de_table) {
de_table %<>%
mutate(goi_set = symbol2goiset(symbol)) %>%
group_by(src) %>%
arrange(lfc) %>%
mutate(is_out = (lfc <= max(head(lfc, 5))) | (min(tail(lfc, 5)) <= lfc)) %>%
ungroup() %>%
mutate(label = ifelse(is_out, symbol, ""))
ggplot(de_table, aes(x = lfc, y = -log10(p))) +
# All genes
geom_point(color = "gray") +
# Color by geneset
geom_point(
data = de_table %>% filter(goi_set != "Other"),
aes(x = lfc, y = -log10(p), color = goi_set)
) +
scale_color_manual(values = ramp(de_table$goi_set)) +
labs(color = "Gene set") +
# Label outliers
#geom_text(aes(label = label), hjust = -0.3, position = position_jitter(h = 4), size = 3, color = "blue") +
ggrepel::geom_text_repel(
aes(label = label),
hjust = -0.3, size = 2, alpha = 1, show.legend = FALSE,
max.overlaps = Inf,
point.size = NA,
color = "blue",
segment.alpha = 0.2
) +
facet_grid(. ~ src, scales = "free") +
theme(legend.position = "bottom")
}
sce_dummy.de$table %>% show_de_volcano()

show_de_basecamp <- function(.) {
(.) %>%
mutate(goi_set = symbol2goiset(symbol)) %>%
group_by(goi_set) %>%
arrange(lfc) %>%
mutate(is_out = (lfc <= max(head(lfc, 5))) | (min(tail(lfc, 5)) <= lfc)) %>%
mutate(is_out = is_out | (min(tail(sort(base), 5)) <= base)) %>%
ungroup() %>%
mutate(label = ifelse(is_out, symbol, "")) %>%
ggplot(aes(x = lfc, y = base)) +
geom_point(data = (.), alpha = 0.5, size = 1, color = "grey") +
geom_point(color = "black", size = 0.5, alpha = 0.3) +
scale_y_log10() +
# geom_text(aes(label = label), hjust = -0.3, position = position_jitter(h = 0.01), size = 2, color = "blue") +
ggrepel::geom_text_repel(
aes(label = label),
hjust = -0.3, size = 2, show.legend = FALSE,
max.overlaps = Inf, point.size = NA,
segment.alpha = 0.2,
color = "blue"
) +
facet_wrap(~goi_set, scale = "free_y") +
# Note: put labs(x = "..") outside
labs(y = "Expression") +
theme(
legend.position = "bottom",
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
)
}
sce_dummy.de$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Condition1 <-- LFC --> Condition2")

show_de_cmp_lfc <- function(table) {
merge(
table %>% filter(src == "DESeq2"),
table %>% filter(src == "edgeR"),
by = "symbol",
all = TRUE
) %>%
# tidyr::replace_na(list(p.x = 0, p.y = 0)) %>%
tidyr::drop_na() %>%
mutate(sig.x = (p.x < quantile(p.x, 0.01))) %>%
mutate(sig.y = (p.y < quantile(p.y, 0.01))) %>%
mutate(sig = if_else(sig.x & sig.y, "Intersection", if_else(sig.x, "DESeq", if_else(sig.y, "edgeR", "")))) %>%
mutate(label = if_else(sig.x | sig.y, symbol, "")) %>%
{
ggplot((.) %>% filter(sig != ""), aes(x = lfc.x, y = lfc.y)) +
geom_point(data = (.) %>% filter(sig == ""), stroke = 0, alpha = 0.2) +
geom_point(aes(color = sig)) +
labs(x = "LFC by DESeq2", y = "LFC by edgeR", color = "Top 1% signif.") +
#geom_text(aes(label = label, color = sig), hjust = -0.3, position = position_jitter(h = 0.05), size = 3, alpha = 0.8, show.legend = FALSE) +
ggrepel::geom_text_repel(
aes(label = label, color = sig),
hjust = -0.3, size = 2, alpha = 0.8, show.legend = FALSE,
max.overlaps = 20,
point.size = NA,
segment.alpha = 0.2
) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed")
}
}
sce_dummy.de$table %>% show_de_cmp_lfc()

compute_go <- function(de_table, n = 33) {
de_table %>%
mutate(direction = if_else(lfc > 0, "up", "dn")) %>%
group_by(direction) %>%
slice_min(p, n = n, with_ties = TRUE) %>%
slice_max(abs(lfc), n = n, with_ties = FALSE) %>%
split(.$direction) %>%
within({
f <- (function(x) x$symbol[1:5] %>% paste(collapse = ", "))
message(paste("Up:", f(up), "..."))
message(paste("Dn:", f(dn), "..."))
rm(f)
}) %>%
lapply(function(de_table) {
de_table %>%
# Get the Entrez IDs of our genes
mutate(gene_id = symbol2geneid(symbol)) %>%
pull(gene_id) %>%
# GO enrichment analysis
limma::goana(species = "Mm") %>%
ind2col("category") %>%
rename(p = P.DE, `#DE` = DE, `#cat` = N, GO = Ont, term = Term) %>%
# Limit selection
filter(p <= 0.05) %>%
arrange(p)
})
}
show_go_table <- function(.) {
(.) %>%
with({
rbind(
up %>% mutate(direction = "lfc_up"),
dn %>% mutate(direction = "lfc_down")
) %>%
arrange(p)
}) %>%
mutate(category = if_else(grepl("^GO:", category), as.link(category), category)) %>%
mutate(category = paste0("<span style='white-space:nowrap;'>", category, "(", GO, ")", "</span>")) %>%
mutate(direction = if_else(grepl("lfc_u", direction), paste0("<span style='color:darkred'>", direction, "</span>"), direction)) %>%
mutate(direction = if_else(grepl("lfc_d", direction), paste0("<span style='color:darkgreen'>", direction, "</span>"), direction)) %>%
dplyr::select(any_of(c("category", "direction", "p", "#DE", "#cat", "term"))) %>%
DT::datatable(escape = FALSE, width = "100%") %>%
DT::formatSignif("p", digits = 3)
}
sce_dummy.go <-
sce_dummy.de$table %>%
compute_go()
## Up: Col4a2, Col4a1, Ier3, Col18a1, Col8a1 ...
## Dn: Col9a2, Pdhb, Col9a2, Ddit4, Col4a6 ...
rbind(
sce_dummy.go$up %>% head(2),
sce_dummy.go$dn %>% head(2),
"..."
) %>%
DT::datatable(escape = FALSE, width = "100%")
sce_dummy.go %>% show_go_table()
go_info <- list(
tax = list(mm = path_to("*/*/*/*_mouse_sym2cat.txt.gz") %>% read.csv(sep = "\t")),
obo = path_to("*/*/*/obo_go.txt.gz") %>% read.csv(sep = "\t")
)
go_info$tax$mm %>%
head() %>%
kable()
| symbol | goid |
|---|---|
| 0610009B22Rik | GO:0003674 |
| 0610009B22Rik | GO:0006888 |
| 0610009B22Rik | GO:0048471 |
| 0610009B22Rik | GO:0005634 |
| 0610009B22Rik | GO:0005737 |
| 0610009B22Rik | GO:0030008 |
go_info$obo %>%
head() %>%
kable()
| goid | term | namespace |
|---|---|---|
| GO:0000001 | mitochondrion inheritance | biological_process |
| GO:0000002 | mitochondrial genome maintenance | biological_process |
| GO:0000003 | reproduction | biological_process |
| GO:0000006 | high-affinity zinc transmembrane transporter activity | molecular_function |
| GO:0000007 | low-affinity zinc ion transmembrane transporter activity | molecular_function |
| GO:0000009 | alpha-1,6-mannosyltransferase activity | molecular_function |
go2symbols <- function(go_term) {
with(
go_info,
merge(tax$mm, filter(obo, stringr::str_detect(term, go_term)), by = "goid") %>%
pull(symbol) %>%
unique()
)
}
# example: "inflammatory"
list(
sce = sce_dummy,
genes = go2symbols("infla"),
table = sce_dummy.de$table %>% filter(src == "edgeR")
) %>%
with(
sce %>%
.[, order(.$subgroup)] %>%
show_de_heatmap(table %>% filter(symbol %in% genes), colors = colors, cluster_cols = FALSE)
)

# If A and B are SingleCellExperiment-like, then `cosine` computes
# the cosine similarity of samples of A against samples of B
cosine <- function(A, B) {
A <- A@assays@data$counts
B <- B@assays@data$counts
ii <- intersect(rownames(A), rownames(B))
A <- t(as.matrix(A[ii, , drop = FALSE]))
B <- t(as.matrix(B[ii, , drop = FALSE]))
A <- A / sqrt(rowSums(A * A))
B <- B / sqrt(rowSums(B * B))
A %*% t(B)
}
Example:
cosine(
sce_dummy[, sce_dummy$subgroup == "healthy1"],
sce_dummy[, sce_dummy$subgroup == "healthy1"]
) %>%
pheatmap::pheatmap()

coex <- function(sce) {
# Filter for the correlation matrix
f <- function(.) ((.) != 0)
sce %>%
SingleCellExperiment::counts() %>%
t() %>%
# stats::cor(method = "spearman") %>%
Hmisc::rcorr(type = "spearman") %>%
`$`(r) %>%
{
with(
(upper.tri(.) * (.)) %>% f() %>% which(arr.ind = TRUE) %>% as.data.frame(),
data.frame(a = rownames(.)[row], b = colnames(.)[col], w = (.)[cbind(row, col)])
)
}
}
show_coex_graph <- function(coex_df, de_table, n = 77) {
coex_df %<>% as.data.frame()
# Note: the graph vertices are pulled from the de_table
# Only those occurring in coex_df$a or .$b are retained
stopifnot(!any(duplicated(de_table$symbol)))
# Color of vertex labels
# de_table %>% pull(color, symbol)
stopifnot(all(c("a", "b", "w") %in% colnames(coex_df)))
# Symbol -> LFC map
node_lfc <- de_table %>% pull(lfc, symbol)
# Attach edge color
coex_df %<>%
group_by(w > 0) %>%
mutate(color = ((1 + w / max(abs(w))) / 2)) %>%
ungroup() %>%
mutate(color = colorRamp(c("blue", "yellow", "red"))(color) %>% RGB(alpha = 0.2))
# Co-expression graph
ex_graph <-
coex_df %>%
# Take top `n` co-expression edges at both extremes
group_by(w > 0) %>%
slice_max(abs(w), n = n) %>%
ungroup() %>%
select(a, b, w, color) %>%
igraph::graph_from_data_frame(
directed = FALSE,
vertices = (
de_table %>%
filter((symbol %in% coex_df$a) | (symbol %in% coex_df$b)) %>%
select(name = symbol, color)
)
)
graphs <- list(
# Proto-graphs based on OmniPath
tf = omnipath.tf,
lr = omnipath.lr,
ia = omnipath.in %>%
anti_join(omnipath.tf, by = c("source", "target")) %>%
anti_join(omnipath.lr, by = c("source", "target"))
) %>%
# Convert them to `igraph` objects
lapply(function(df) {
df %>%
mutate(W = is_stimulation - is_inhibition) %>%
select(a = source_genesymbol, b = target_genesymbol, W) %>%
filter(a %in% igraph::V(ex_graph)$name) %>%
filter(b %in% igraph::V(ex_graph)$name) %>%
group_by(a, b) %>%
summarize(W = sum(W), .groups = "keep") %>%
filter(W != 0) %>%
# Attach co-expression `w` and edge color
merge(coex_df %>% select(a, b, w, color), by = c("a", "b")) %>%
# Ensure correct order
select(a, b, w, W, color) %>%
igraph::graph_from_data_frame(directed = TRUE)
}) %>%
within({
# Attach the co-expression graph
ex <- ex_graph
# For the co-expression graph, by definition:
# `W` is positive iff both LFC are of the same sign
edges <- ex %>% igraph::as_data_frame("edges")
igraph::E(ex)$W <- sign(node_lfc[edges$from] * node_lfc[edges$to])
rm(edges)
}) %>%
lapply(function(g) {
igraph::E(g)$coherent <- (sign(igraph::E(g)$w) == sign(igraph::E(g)$W))
return(g)
})
rm(ex_graph)
# for layouting
set.seed(41)
graph_layout <-
(
graphs$ex
# + igraph::as.undirected(graphs$tf)
# + igraph::as.undirected(graphs$lr)
+ igraph::as.undirected(graphs$ia)
) %>%
# igraph::layout_with_kk(dim = 2, kkconst = igraph::vcount(.)) %>%
igraph::layout_with_dh(maxiter = 22) %>%
# igraph::layout_with_lgl() %>%
# igraph::layout_with_fr(start.temp = 1e-1) %>%
as.data.frame() %>%
magrittr::set_colnames(c("x", "y")) %>%
magrittr::set_rownames(igraph::V(graphs$ex)$name)
graphs %<>%
lapply(function(graph) {
list(graph = graph, pos = graph_layout) %>% within({
# Edges as dataframe
edges <-
graph %>%
igraph::get.edgelist() %>%
magrittr::set_colnames(c("a", "b")) %>%
as.data.frame()
# Attach segment data
segment <-
cbind(
take_rows(pos, edges$a) %>% magrittr::set_colnames(c("x", "y")),
take_rows(pos, edges$b) %>% magrittr::set_colnames(c("xend", "yend"))
) %>%
mutate(
x = x + (xend - x) * 0.02, xend = xend - (xend - x) * 0.03,
y = y + (yend - y) * 0.02, yend = yend - (yend - y) * 0.03
)
})
})
list(
slide1 = list(
alpha = list(ex = 2e-2, tf = 0.77, lr = 3e-2, ia = 3e-2),
title = "TF -> target"
),
slide2 = list(
alpha = list(ex = 2e-2, tf = 3e-2, lr = 0.77, ia = 3e-2),
title = "Ligand -> receptor"
),
slide3 = list(
alpha = list(ex = 2e-2, tf = 3e-2, lr = 3e-2, ia = 0.77),
title = "More protein-protein interaction"
),
slide4 = list(
alpha = list(ex = 0.55, tf = 3e-2, lr = 3e-2, ia = 3e-2),
title = "Co-expression"
)
) %>% lapply(function(slide) {
p <- ggplot()
p <- p +
# Co-expression segments
geom_segment(
data = graphs$ex$segment,
aes(x = x, y = y, xend = xend, yend = yend),
color = igraph::E(graphs$ex$graph)$color,
alpha = slide$alpha$ex,
size = 0.1,
linetype = if_else(igraph::E(graphs$ex$graph)$coherent, "solid", "dashed")
)
for (x in c("tf", "lr", "ia")) {
E <- igraph::E(graphs[[x]]$graph)
p <- p +
geom_curve(
data = graphs[[x]]$segment,
lineend = "butt", # linejoin = "mitre",
aes(x = x, y = y, xend = xend, yend = yend),
arrow = arrow(
length = unit(0.007, "npc"),
# activation / inhibition arrow:
angle = if_else(E$W > 0, 10, if_else(E$W < 0, 90, 0))
),
color = E$color,
alpha = slide$alpha[[x]],
size = 0.2,
curvature = 0.05,
linetype = if_else(E$coherent, "solid", "dashed")
)
}
p <- p +
# Labels
geom_text(
data = graph_layout,
aes(x = x, y = y, label = igraph::V(graphs$ex$graph)$name),
size = 2,
color = igraph::V(graphs$ex$graph)$color
)
p <- p +
theme_void() +
ggplot2::ggtitle(slide$title) +
theme(plot.title = element_text(hjust = 0.95, vjust = -3, size = 10))
return(p)
})
# graph %>%
# igraph::plot.igraph(
# edge.width = 0.7,
# edge.color = igraph::E(.)$color,
#
# vertex.size = 0,
# vertex.shape = 'none',
# #vertex.color = "black",
# vertex.label.cex = 0.4,
# vertex.label.color = vcolor[igraph::V(.)$name],
#
# layout = graph_layout
# )
}
sce_dummy %>%
`[`(, (.)$group == "healthy") %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(sce_dummy.de$table %>% filter(src == "edgeR"))




requireNamespace("visNetwork")
## Loading required namespace: visNetwork
omnipath.tf %>%
mutate(a = source_genesymbol, b = target_genesymbol) %>%
filter(a %in% get_goi_genes(), b %in% get_goi_genes()) %>%
select(a, b) %>%
igraph::graph_from_data_frame(directed = FALSE) %>%
visNetwork::toVisNetworkData() %>%
{
visNetwork::visNetwork(nodes = .$nodes, edges = .$edges, layout = "layout_with_kk") %>%
visNetwork::visPhysics(enabled = FALSE)
}
# scenic_options <-
# SCENIC::initializeScenic(org = "mgi", dbDir = path_to("data/SCENIC/*cache"), dbs = SCENIC::defaultDbNames[["mgi"]], datasetTitle = "dummy", nCores = 1)
# sce_dummy@assays@data$counts %>%
# list(counts = .) %>% with({
# counts %>%
# `[`(SCENIC::geneFiltering(., scenicOptions = scenic_options), ) %>%
# #SCENIC::runCorrelation(scenic_options) %>%
# log1p() %>%
# SCENIC::runGenie3(scenic_options)
# })
#
#
# `%dopar%` <- foreach::`%dopar%`
# foreach <- foreach::foreach
# scenic_options %>%
# SCENIC::runSCENIC_1_coexNetwork2modules() %>%
# SCENIC::runSCENIC_2_createRegulons() %>%
# SCENIC::runSCENIC_3_scoreCells(log1p(sce_dummy@assays@data$counts))
Remove unused.
rm(cco_revelio)
gene.summary$hs <- NULL
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9417197 503 16351948 873.3 16351948 873.3
## Vcells 25158507 192 42653996 325.5 35478330 270.7
RAM usage.
culprits() %>%
head(10) %>%
kable()
| size | cumsize | unit | |
|---|---|---|---|
| omnipath.tf | 66.00 | 170.0 | Mb |
| omnipath.in | 41.00 | 110.0 | Mb |
| gene.summary | 39.00 | 66.0 | Mb |
| go_info | 16.00 | 28.0 | Mb |
| omnipath.lr | 4.10 | 12.0 | Mb |
| show_coex_graph | 1.60 | 8.1 | Mb |
| sce_dummy | 0.52 | 6.5 | Mb |
| sce_dummy.go | 0.48 | 6.0 | Mb |
| goi_agg_by_group | 0.45 | 5.5 | Mb |
| heatmap | 0.37 | 5.0 | Mb |
# Collection of dataset loaders
load_dataset <- list()
# Summarizes each column in `cols`
# Call with results='asis'
tables <- function(sce, cols) {
for (col in cols) {
sce@colData %>%
as.data.frame() %>%
pull(col) %>%
table(useNA = "ifany") %>%
as.data.frame() %>%
rename_with(~col, ".") %>%
rename_with(~"#", Freq) %>%
t() %>%
kable() %>%
print()
# https://bookdown.org/yihui/rmarkdown-cookbook/kable.html#generate-multiple-tables-from-a-for-loop
# https://stackoverflow.com/questions/52631689/how-to-show-formatted-r-output-with-results-asis-in-rmarkdown
}
}
“Mouse Whole Cortex and Hippocampus 10x” dataset from Allen Brain, 2020: [explore], [download], [protocols].
The dataset contains ~1M cells. We have selected ~31k cells of type “Astro”, “Endo” and “VLMC”.
load_dataset$ab <- function(max.size = 9999) {
sce <-
make_sce(
counts = (
path_to("*/AllenBrain/Mouse-WCH*/*fewer_cells/data.*.gz") %>%
from_tsv() %>%
`[`(, sort(colnames(.)))
),
coldata = (
path_to("*/AllenBrain/Mouse-WCH*/f*cells/meta.*.gz") %>%
from_tsv() %>%
`[`(sort(rownames(.)), )
)
) %>%
# Only keep VLMC
`[`(, (.)$subclass_label == "VLMC") %>%
# Select a random subset of cells for expediency
`[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
# Drop non-expressed cells
`[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
# Order by cell type: sometimes convenient below, esp. for plotting
`[`(, order((.)$cell_type_alias_label))
# conversion to matrix takes long, hence do it here
sce@assays@data$counts %<>% as.matrix()
# alias `celltype` column
sce@colData$celltype <- sce@colData$cell_type_alias_label
sce@colData$celltype %<>% as.factor() %>% relevel(ref = "375_VLMC")
return(sce)
}
GSE98816: single cell RNA-seq of mouse brain vascular transcriptomes.
load_dataset$bh <- function(max.size = 9999) {
sce <-
make_sce(
counts = path_to("*/Betsholtz-2018/*/expr.*.gz") %>%
from_tsv() %>%
t(),
coldata = path_to("*/Betsholtz-2018/*/meta.*.gz") %>% from_tsv()
) %>%
# Only keep FB
`[`(, (.)$celltype %in% c("FB1", "FB2")) %>%
# Select a random subset of cells for expediency
`[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
# Drop non-expressed cells
`[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
# Order by cell type
`[`(, order((.)$celltype))
sce@assays@data$counts %<>% as.matrix()
sce@colData$celltype %<>% as.factor() %>% relevel(ref = "FB1")
return(sce)
}
Raw data from GSE113973 (last update: Nov 13, 2019), metadata from UCSC cell browser.
load_dataset$cb <- function(max.size = 9999) {
sce <-
make_sce(
counts = path_to("*/CasteloBranco-2018/*/expr.*.gz") %>%
from_tsv() %>%
t(),
coldata = path_to("*/CasteloBranco-2018/*/meta.*.gz") %>%
from_tsv()
) %>%
# Only keep those cells
`[`(, (.)$celltype %in% c("VLMC1", "VLMC2", "VLMC3", "VLMC4")) %>%
# Select a random subset of cells for expediency
`[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
# Drop non-expressed cells
`[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
# Order by cell type
`[`(, order((.)$celltype))
sce@assays@data$counts %<>% as.matrix()
sce@colData$Group %<>% as.factor() %>% relevel(ref = "Ctrl")
sce@colData$celltype %<>% as.factor() %>% relevel(ref = "VLMC3")
return(sce)
}
Expression data from GSE135186 (last update: Feb 10, 2021). The cell-to-cluster assignment is a private communication by D. Aran (2021-05-20). The associated paper is Dorrier-Aran-…-Daneman, 2021.
load_dataset$dm <- function(max.size = 9999) {
sce <-
make_sce(
counts = path_to("*/Daneman-2020/*/expr.*.gz") %>%
from_tsv() %>%
t(),
coldata = path_to("*/Daneman-2020/*/meta.*.gz") %>%
from_tsv()
) %>%
# Select a random subset of cells for expediency
`[`(, sample(colnames(.), size = min(ncol(.), max.size))) %>%
# Drop non-expressed cells (there shouldn't be any here)
`[`(, (.)@assays@data$counts %>% colSums() > 0) %>%
# Order samples
`[`(, order((.)$cluster))
# Drop cells without cluster (they separate in dim. red. plots)
sce %<>% `[`(, !is.na((.)$cluster))
sce@assays@data$counts %<>% as.matrix()
# Set factor reference level
sce@colData$group %<>% as.factor() %>% relevel(ref = "healthy")
sce@colData$cluster %<>% c() %>%
as.factor() %>%
relevel(ref = "0")
# Alias for consistency
sce@colData$celltype <- sce@colData$cluster
return(sce)
}
datasets <- load_dataset %>% lapply(\(loader) memo(loader)(max.size = 777))
# Note: Don't cache but memoize here
datasets %<>% lapply(dealias_sce)
## Warning in limma::alias2SymbolTable(., species = species): Multiple symbols
## ignored for one or more aliases
## Warning in is.na(.) %>% {: Dealiasing with `limma` genes resulted in 3979 NAs,
## e.g. at: Gm28402, Gm37635, Gm48945, Gm26969, AC155941.1
## Warning in limma::alias2SymbolTable(., species = species): Multiple symbols
## ignored for one or more aliases
## Warning in is.na(.) %>% {: Dealiasing with `limma` genes resulted in 137 NAs,
## e.g. at: Rims1.1, LOC102633315, AI848285, AF529169, Snord53.2
## Warning in limma::alias2SymbolTable(., species = species): Multiple symbols
## ignored for one or more aliases
## Warning in is.na(.) %>% {: Dealiasing with `limma` genes resulted in 190
## NAs, e.g. at: LOC100862268, Cypt9, Duxbl1+Duxbl2+Duxbl3, LOC101056149,
## Mir692-2b+Mir692-3
## Warning in limma::alias2SymbolTable(., species = species): Multiple symbols
## ignored for one or more aliases
## Warning in is.na(.) %>% {: Dealiasing with `limma` genes resulted in 2904 NAs,
## e.g. at: Gm37875, Gm29069, Gm42836, RP23-328F4.7, Gm43562
datasets <- datasets %>% lapply(scater_attach)
# memo: `%<>%` doesn't seem to work with cache=TRUE
gridExtra::grid.arrange(
datasets$ab %>% scater_show(color.by = "celltype", colors = colors),
datasets$ab %>% scater_show(color.by = "donor_sex_label"),
ncol = 2
)

gridExtra::grid.arrange(
datasets$bh %>% scater_show(color.by = "celltype", colors = colors),
datasets$bh %>% scater_show(color.by = "Mouse ID"),
ncol = 2
)

gridExtra::grid.arrange(
datasets$cb %>% scater_show(color.by = "celltype", colors = colors),
datasets$cb %>% scater_show(color.by = "Group", colors = colors),
ncol = 2
)

gridExtra::grid.arrange(
datasets$cb %>% scater_show(color.by = "MouseModel", colors = colors),
datasets$cb %>% scater_show(color.by = "Plate", colors = colors),
ncol = 2
)

gridExtra::grid.arrange(
datasets$dm %>% scater_show(color.by = "subgroup", colors = colors),
datasets$dm %>% scater_show(color.by = "cluster", colors = colors),
ncol = 2
)

Remove unused.
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9506734 507.8 16351948 873.3 16351948 873.3
## Vcells 25446627 194.2 65995349 503.6 60589798 462.3
RAM usage.
culprits() %>%
head(10) %>%
kable()
| size | cumsize | unit | |
|---|---|---|---|
| datasets | 470.00 | 640.0 | Mb |
| omnipath.tf | 66.00 | 170.0 | Mb |
| omnipath.in | 41.00 | 110.0 | Mb |
| gene.summary | 39.00 | 67.0 | Mb |
| go_info | 16.00 | 28.0 | Mb |
| omnipath.lr | 4.10 | 12.0 | Mb |
| show_coex_graph | 1.60 | 8.3 | Mb |
| sce_dummy | 0.52 | 6.8 | Mb |
| sce_dummy.go | 0.48 | 6.2 | Mb |
| goi_agg_by_group | 0.45 | 5.8 | Mb |
The following is an approximation of Extended Data Fig 4c from Dorrier-Aran-…-Daneman, 2021.
We check whether other datasets (besides “Daneman”) cluster by their cell types using the same set of genes.
The samples are presorted by “Daneman” cluster and by condition within each cluster.
datasets$dm %>%
{
# Remove the `celltype` columns (same as `cluster`)
.@colData %<>% `[`(, colnames(.) != "celltype", drop = FALSE)
# Order samples
.[, order(paste(.$cluster, .$subgroup))]
} %>%
show_dm_heatmap(colors = colors, cluster_cols = FALSE)

The samples are clustered.
datasets$ab %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

The samples are clustered.
datasets$bh %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

The samples are clustered.
datasets$cb %>% show_dm_heatmap(colors = colors, cluster_cols = TRUE)

cco <- list(
ab = datasets$ab %>% cell_cycle(batch.by = "celltype"),
bh = datasets$bh %>% cell_cycle(batch.by = "celltype"),
cb = datasets$cb %>% cell_cycle(batch.by = "celltype"),
dm_h12 = datasets$dm %>%
.[, .$subgroup %in% c("healthy1", "healthy2")] %>%
cell_cycle(batch.by = "subgroup"),
dm_h12_0 = datasets$dm %>%
.[, .$subgroup %in% c("healthy1", "healthy2")] %>%
.[, .$cluster %in% c(0)] %>%
cell_cycle(batch.by = "subgroup")
)
## 2021-07-08 20:15:05: calculating optimal rotation: 2021-07-08 20:15:05: calculating PCA: 2021-07-08 20:15:05: assigning cell cycle phases: 2021-07-08 20:15:05: reading data: 0.32secs
## 1.82secs
## 3.17secs
## 11.02secs
## 2021-07-08 20:15:16: calculating optimal rotation: 2021-07-08 20:15:16: calculating PCA: 2021-07-08 20:15:16: assigning cell cycle phases: 2021-07-08 20:15:16: reading data: 0.17secs
## 1.51secs
## 2.02mins
## 2.34mins
## 2021-07-08 20:17:37: calculating optimal rotation: 2021-07-08 20:17:37: calculating PCA: 2021-07-08 20:17:37: assigning cell cycle phases: 2021-07-08 20:17:37: reading data: 0.2secs
## 1.84secs
## 2.35mins
## 2.53mins
## 2021-07-08 20:20:09: calculating optimal rotation: 2021-07-08 20:20:09: calculating PCA: 2021-07-08 20:20:09: assigning cell cycle phases: 2021-07-08 20:20:09: reading data: 1.51secs
## 4.93secs
## 6.62secs
## 14.95secs
## 2021-07-08 20:20:24: calculating optimal rotation: 2021-07-08 20:20:24: calculating PCA: 2021-07-08 20:20:24: assigning cell cycle phases: 2021-07-08 20:20:24: reading data: 0.2secs
## 0.86secs
## 1.28secs
## 32.22secs
cco$ab %>% cell_cycle_plot2d(colors = colors)

cco$bh %>% cell_cycle_plot2d(colors = colors)

cco$cb %>% cell_cycle_plot2d(colors = colors)

cco$dm_h12 %>% cell_cycle_plot2d(colors = colors)

cco$dm_h12_0 %>% cell_cycle_plot2d(colors = colors)

datasets$ab %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

datasets$bh %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

datasets$cb %>% show_goi_pca(goi$pvf$genes, group.by = "Group", colors = colors)

datasets$cb %>% show_goi_pca(goi$pvf$genes, group.by = "celltype", colors = colors)

datasets$dm %>% show_goi_pca(goi$pvf$genes, group.by = "subgroup", colors = colors)

datasets$dm %>% show_goi_pca(goi$pvf$genes, group.by = "cluster")

Click on the plot to scroll through the comparisons (shift-click to cycle back).
pairs.to.contrast <-
list(
list(A = goi$col$name, B = goi$air$name),
list(A = goi$glycolysis$name, B = goi$air$name),
list(A = goi$fib_mig$name, B = goi$air$name)
)
show_ab_density <- function(sce, group.by = "celltype", colors = ramp(sce@colData[[group.by]])) {
stopifnot(
all(sce@colData[[group.by]] %in% names(colors))
)
sce %<>%
scater::aggregateAcrossFeatures(
ids = symbol2goiset(rownames(.)),
average = TRUE,
use.assay.type = "logcounts"
)
pairs.to.contrast %>%
lapply(dual(function(A, B) {
data.frame(
a = sce@assays@data$logcounts[A, ],
b = sce@assays@data$logcounts[B, ],
g = sce@colData[[group.by]]
) %>% {
(.) %>%
ggplot(aes(x = (a - b), color = g)) +
labs(x = paste0(" - ", B, " + ", A)) +
labs(y = "Fraction of cells (i.e., density)") +
labs(color = group.by) +
scale_color_manual(values = colors, breaks = gglast(colour)) +
# ggtitle(paste0(A, " vs ", B)) +
geom_density(size = 3)
}
}))
}
show_ab_2d <- function(sce, group.by = "celltype", colors = ramp(sce@colData[[group.by]])) {
stopifnot(
all(sce@colData[[group.by]] %in% names(colors))
)
# Group -- for color
g <- sce@colData[[group.by]]
# Library size -- for size
s <- sce@assays@data$counts %>% colSums()
sce %<>%
scater::aggregateAcrossFeatures(
ids = symbol2goiset(rownames(.)),
average = TRUE,
use.assay.type = "logcounts"
)
pairs.to.contrast %>%
lapply(dual(function(A, B) {
data.frame(
x = sce@assays@data$logcounts[A, ],
y = sce@assays@data$logcounts[B, ],
g = g,
s = s
) %>% {
(.) %>%
ggplot(aes(x = x, y = y, color = g, size = s)) +
labs(x = A, y = B, color = group.by, size = "Library size") +
scale_color_manual(values = colors, breaks = gglast(colour)) +
geom_point(alpha = 0.6) +
# ggtitle(paste0(A, " vs ", B)) +
guides(shape = FALSE) +
geom_density_2d(size = 0.3, alpha = 0.5) +
guides(color = guide_legend(override.aes = list(alpha = 1)))
}
}))
}
datasets$ab %>% show_ab_density(group.by = "celltype", colors = colors)



datasets$ab %>% show_ab_2d(group.by = "celltype", colors = colors)
## now dyn.load("/usr/lib/R/library/MASS/libs/MASS.so") ...
## now dyn.load("/home/ra/R/x86_64-pc-linux-gnu-library/4.1/isoband/libs/isoband.so") ...



datasets$bh %>% show_ab_density(group.by = "celltype", colors = colors)



datasets$bh %>% show_ab_2d(group.by = "celltype", colors = colors)



datasets$cb %>% show_ab_density(group.by = "celltype", colors = colors)



datasets$cb %>% show_ab_2d(group.by = "celltype", colors = colors)



datasets$dm %>% show_ab_density(group.by = "cluster")



datasets$dm %>% show_ab_2d(group.by = "cluster")



datasets$dm %>% show_ab_density(group.by = "subgroup", colors = colors)



datasets$dm %>% show_ab_2d(group.by = "subgroup", colors = colors)



de <- list(
# Other/375_VLMC
ab_other_vs_375 = datasets$ab %>% compute_de(~celltype),
# FB2/FB1:
bh_fb2_vs_fb1 = datasets$bh %>% compute_de(~celltype),
# EAE/Ctrl (exclude the VLMC3 cells because they separate on PCA):
cb_eae_vs_ctrl = datasets$cb %>% `[`(, (.)$celltype != "VLMC3") %>% compute_de(~Group),
# Other/VLMC3:
cb_other_vs_vlmc3 = datasets$cb %>% compute_de(~celltype),
# EAE/Healthy:
dm_eae_vs_hthy = datasets$dm %>% compute_de(~group)
)
## Warning in DESeq2::DESeqDataSet(., design = design): 49 duplicate rownames were
## renamed by adding numbers
## Warning in DESeq2::DESeqDataSet(., design = design): 29 duplicate rownames were
## renamed by adding numbers
## Warning in DESeq2::DESeqDataSet(., design = design): 47 duplicate rownames were
## renamed by adding numbers
## Warning in DESeq2::DESeqDataSet(., design = design): 47 duplicate rownames were
## renamed by adding numbers
## Warning in DESeq2::DESeqDataSet(., design = design): 54 duplicate rownames were
## renamed by adding numbers
show_heatmap_slides <- function(sce, de_table) {
genesets <- list(
list(name = "Neuron projection", genes = go2symbols("neuron projection")),
list(name = "Cell adhesion", genes = go2symbols("cell adhesion")),
list(name = "Fibroblast growth factor", genes = go2symbols("fibroblast growth factor")),
list(name = "Smooth muscle", genes = go2symbols("smooth muscle")),
list(name = "Cell communication", genes = go2symbols("cell communication")),
list(name = "Chemotaxis", genes = go2symbols("chemotaxis")),
list(name = "Amyloid", genes = go2symbols("amyloid")),
list(name = "Insulin", genes = go2symbols("insulin")),
list(name = "Actin filament", genes = go2symbols("actin filament")),
list(name = "Apoptosis", genes = go2symbols("apoptosis")),
goi$fib_mig,
list(name = "Wounding", genes = go2symbols("wounding")),
list(name = "Inflammatory", genes = go2symbols("inflammatory")),
list(name = "Immune response", genes = go2symbols("immune response")),
goi$neuroinflammation,
goi$air,
goi$glycolysis,
goi$col,
list(name = "Cytokine activity", genes = go2symbols("^cytokine activity")),
list(name = "Chemokine activity", genes = go2symbols("^chemokine activity")),
goi$eae_common_up,
list(name = "Most-DE genes", genes = de_table$symbol, n = 33)
)
# genesets <- goi[c("glycolysis", "fib_mig")] # DEBUG
# default max number of genes to show
n <- 47
genesets %>% lapply(function(geneset) {
geneset %>%
with({
sub_de_table <- de_table %>% filter(symbol %in% genes)
if (!(any(rownames(sce) %in% sub_de_table$symbol))) {
return()
}
show_de_heatmap(
sce,
sub_de_table,
colors = colors,
cluster_cols = FALSE,
n = n,
main = name
)
}) %>%
magrittr::use_series(gtable)
})
}
de$ab_other_vs_375$table %>% show_de_table()
Click on the image to cycle through the sets (shift-click to cycle back).
show_heatmap_slides(
datasets$ab %>% .[, order(.$celltype)],
de$ab_other_vs_375$table %>% filter(src == "edgeR")
)






















de$ab_other_vs_375$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Up in 375_VLMC <-- LFC --> Up in Other")

de$ab_other_vs_375$table %>%
show_de_cmp_lfc()
## Warning: ggrepel: 30 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

de$ab_other_vs_375$table %>%
show_de_volcano() +
labs(x = "Up in 375_VLMC <-- LFC --> Up in Other", color = "Geneset")

de$bh_fb2_vs_fb1$table %>% show_de_table()
Click on the image to cycle through the sets.
show_heatmap_slides(
datasets$bh %>% .[, order(.$celltype)],
de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR")
)






















de$bh_fb2_vs_fb1$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Up in FB1 <-- LFC --> Up in FB2")

de$bh_fb2_vs_fb1$table %>%
show_de_cmp_lfc()
## Warning: ggrepel: 75 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

de$bh_fb2_vs_fb1$table %>%
show_de_volcano() +
labs(x = "Up in FB1 <-- LFC --> Up in FB2", color = "Geneset")

de$cb_other_vs_vlmc3$table %>% show_de_table()
Click on the image to cycle through the sets.
show_heatmap_slides(
datasets$cb %>% .[, order(.$celltype)],
de$cb_other_vs_vlmc3$table %>% filter(src == "edgeR")
)






















de$cb_other_vs_vlmc3$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Up in VLMC3 <-- LFC --> Up in Other")

de$cb_other_vs_vlmc3$table %>%
show_de_cmp_lfc()
## Warning: ggrepel: 27 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

de$cb_other_vs_vlmc3$table %>%
show_de_volcano() +
labs(x = "Up in VLMC3 <-- LFC --> Up in Other", color = "Geneset")

de$cb_eae_vs_ctrl$table %>% show_de_table()
Click on the image to cycle through the sets.
show_heatmap_slides(
datasets$cb %>% .[, .$celltype != "VLMC3"] %>% .[, order(.$Group)],
de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR")
)






















de$cb_eae_vs_ctrl$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Up in Ctrl <-- LFC --> Up in EAE")

de$cb_eae_vs_ctrl$table %>%
show_de_cmp_lfc()
## Warning: ggrepel: 124 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

de$cb_eae_vs_ctrl$table %>%
show_de_volcano() +
labs(x = "Up in Ctrl <-- LFC --> Up in EAE", color = "Geneset")

de$dm_eae_vs_hthy$table %>% show_de_table()
Click on the image to cycle through the sets.
datasets$dm %>%
{
# Remove the `celltype` columns (same as `cluster`)
.@colData %<>% `[`(, colnames(.) != "celltype", drop = FALSE)
# Order samples
.[, order(.$subgroup)]
} %>%
show_heatmap_slides(
de$dm_eae_vs_hthy$table %>% filter(src == "edgeR")
)






















de$dm_eae_vs_hthy$table %>%
filter(src == "edgeR") %>%
show_de_basecamp() +
labs(x = "Healthy <-- LFC --> EAE")

de$dm_eae_vs_hthy$table %>%
show_de_cmp_lfc()
## Warning: ggrepel: 145 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

de$dm_eae_vs_hthy$table %>%
show_de_volcano() +
labs(x = "Healthy <-- LFC --> EAE", color = "Geneset")

Use DE results from EdgeR.
go <-
list(n33 = 33, n77 = 77, n150 = 150) %>% lapply(function(n) {
de %>% lapply(function(de_result) {
de_result$table %>%
filter(src == "edgeR") %>%
compute_go(n = n)
})
})
go$n33$ab_other_vs_375 %>% show_go_table()
go$n33$bh_fb2_vs_fb1 %>% show_go_table()
go$n33$cb_other_vs_vlmc3 %>% show_go_table()
go$n33$cb_eae_vs_ctrl %>% show_go_table()
go$n33$dm_eae_vs_hthy %>% show_go_table()
The straight edges are the top negative (blue) and positive (red) gene-gene Spearman correlations among the “genes of interest” within each sub-dataset. The nodes are colored by condition.
The arched edges are TF-target, ligand-receptor, protein-protein interactions from OmniPath. The color is from co-expression (yellow = neutral). The line is dashed if the observed co-expression contradicts the expected effect, e.g. inhibition is expected but positive co-expression is observed.
Click on the figure to change the view; alt-click to enlarge.
datasets$ab %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))




datasets$ab %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "374_VLMC") %>%
coex() %>%
show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))




datasets$ab %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "375_VLMC") %>%
coex() %>%
show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))




datasets$ab %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "376_VLMC") %>%
coex() %>%
show_coex_graph(de$ab_other_vs_375$table %>% filter(src == "edgeR"))




datasets$bh %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))




datasets$bh %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "FB1") %>%
coex() %>%
show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))




datasets$bh %>%
take_rows(get_goi_genes()) %>%
`[`(, .$celltype == "FB2") %>%
coex() %>%
show_coex_graph(de$bh_fb2_vs_fb1$table %>% filter(src == "edgeR"))




datasets$cb %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))




datasets$cb %>%
take_rows(get_goi_genes()) %>%
`[`(, .$Group == "Ctrl") %>%
coex() %>%
show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))




datasets$cb %>%
take_rows(get_goi_genes()) %>%
`[`(, .$Group == "EAE") %>%
coex() %>%
show_coex_graph(de$cb_eae_vs_ctrl$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
`[`(, .$subgroup %in% c("healthy1", "healthy2")) %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
`[`(, .$subgroup == "healthy3") %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
`[`(, .$subgroup == "EAE1") %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




datasets$dm %>%
take_rows(get_goi_genes()) %>%
`[`(, .$subgroup == "EAE2") %>%
coex() %>%
show_coex_graph(de$dm_eae_vs_hthy$table %>% filter(src == "edgeR"))




Clean up.
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9549610 510.1 16396008 875.7 16396008 875.7
## Vcells 144172195 1100.0 295234038 2252.5 295222795 2252.4
RAM usage.
culprits() %>%
head(10) %>%
kable()
| size | cumsize | unit | |
|---|---|---|---|
| datasets | 470.00 | 670.0 | Mb |
| omnipath.tf | 66.00 | 200.0 | Mb |
| omnipath.in | 41.00 | 130.0 | Mb |
| gene.summary | 39.00 | 91.0 | Mb |
| de | 17.00 | 52.0 | Mb |
| go_info | 16.00 | 35.0 | Mb |
| go | 5.80 | 20.0 | Mb |
| omnipath.lr | 4.10 | 14.0 | Mb |
| show_coex_graph | 1.60 | 9.8 | Mb |
| sce_dummy | 0.52 | 8.2 | Mb |
de_tables <- list(
a = de$ab_other_vs_375$table %>% mutate(name = "AB: Other/375_VLMC"),
b = de$bh_fb2_vs_fb1$table %>% mutate(name = "Bh: FB2/FB1"),
c1 = de$cb_other_vs_vlmc3$table %>% mutate(name = "CB: Other/VLMC3"),
c2 = de$cb_eae_vs_ctrl$table %>% mutate(name = "CB': EAE/Ctrl"),
d = de$dm_eae_vs_hthy$table %>% mutate(name = "Dm: EAE/Healthy")
)
de_tables %>%
dual(rbind) %>%
slice_sample(n = 7)
## # A tibble: 7 x 7
## symbol base lfc p src color name
## * <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 Tbx3os2 2.41 0 1 edgeR #7E801BFF CB': EAE/Ctrl
## 2 Mpi 1.24 0.597 1 DESeq2 #B27634FF CB': EAE/Ctrl
## 3 D230025D16Rik 4.42 -0.216 1 edgeR #4263F3FF Bh: FB2/FB1
## 4 Tril 7.14 -0.877 0.560 edgeR #4F76F1FF Bh: FB2/FB1
## 5 Stfa3 9.27 0 1 edgeR #35DE12FF Dm: EAE/Healthy
## 6 Hnrnpl 2.63 0.278 1 DESeq2 #9D7A2AFF CB': EAE/Ctrl
## 7 Tent5b 1.57 3.36 1 DESeq2 #E56C4CFF CB': EAE/Ctrl
datasets$combo <-
list(
ab = data.frame(
datasets$ab@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
group = "AB",
subgroup = datasets$ab$celltype
),
bh = data.frame(
datasets$bh@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
group = "Bh",
subgroup = datasets$bh$celltype
),
cb = data.frame(
datasets$cb@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
group = "CB",
subgroup = datasets$cb$Group
),
dm = data.frame(
datasets$dm@assays@data$counts %>% take_rows(get_goi_genes()) %>% t(),
group = "Dm",
subgroup = datasets$dm$subgroup
) %>% {
(.)[sample(rownames(.), size = min(nrow(.), 777)), ]
}
) %>%
lapply(function(ds) {
ds[, Reduce(intersect, lapply(., colnames))]
}) %>%
dual(rbind) %>%
{
make_sce(
(.) %>% select(-group, -subgroup) %>% t(),
(.) %>% select(group, subgroup)
)
}
print(datasets$combo)
## class: SingleCellExperiment
## dim: 333 1017
## metadata(0):
## assays(1): counts
## rownames(333): Fos Fosb ... Tpm2 Actb
## rowData names(0):
## colnames(1017): ab.CAGAATCTCTGGGCCA-L8TX_181011_01_A03
## ab.CGTCAGGTCTGTCTAT-L8TX_180115_01_G11 ... dm.EAE1_CTTCTCTTCAAAGTAG
## dm.healthy2_CCTACCATCAGCGACC
## colData names(2): group subgroup
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
Compare the gene LFC (edgeR) across datasets. Each dot is a gene with x-coordinate as the LFC in one dataset and y-coordinate as the LFC in another.
de_tables %>%
lapply(function(t) {
t %>%
group_by(name, src) %>%
mutate(lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
ungroup() %>%
filter(src == "edgeR")
}) %>%
with({
rbind(
merge(a, b, by = "symbol"),
merge(a, c1, by = "symbol"),
merge(a, c2, by = "symbol"),
merge(a, d, by = "symbol"),
merge(b, c1, by = "symbol"),
merge(b, c2, by = "symbol"),
merge(b, d, by = "symbol"),
merge(c1, d, by = "symbol"),
merge(c2, d, by = "symbol")
)
}) %>%
mutate(goi_set = symbol2goiset(symbol)) %>%
{
ggplot(., aes(x = lfc.x, y = lfc.y)) +
# Background
geom_point(alpha = 0.05, color = "black", size = 0.7) +
geom_smooth(formula = y ~ x, method = "lm", size = 0.2, color = "gray") +
# Foreground
geom_point(
data = filter(., goi_set != "Other"),
aes(color = goi_set),
stroke = 0, size = 1, alpha = 0.7
) +
# geom_smooth(data = filter(., goi_set != "Other"), formula = y ~ x, method = 'lm', size = 0.2) +
labs(x = "LFC", y = "LFC", color = "") +
# scale_color_brewer(palette = "Set1") +
facet_grid(
cols = vars(name.x), rows = vars(name.y),
scale = "free"
) +
theme(
legend.position = "bottom",
axis.text = element_blank(),
axis.ticks = element_blank(),
strip.text = element_text(size = 10),
axis.title = element_blank()
) +
guides(color = guide_legend(override.aes = list(size = 3, alpha = 1)))
}

The following table shows the top genes that are DE (one way or another according to edgeR) simultaneously in several datasets.
make_common_de_table <- function(de_tables, slicer = slice_max) {
de_tables %>%
lapply(function(t) {
t %>%
group_by(name, src) %>%
mutate(norm.lfc = ((lfc - mean(lfc)) / sd(lfc))) %>%
slicer(abs(norm.lfc), n = 777) %>%
ungroup() %>%
filter(src == "edgeR")
}) %>%
dual(rbind) %>%
as.data.frame() %>%
select(symbol, norm.lfc, name) %>%
stats::reshape(direction = "wide", idvar = "symbol", timevar = "name") %>%
# Commonly-DE first
arrange(desc(rowSums(!is.na(across(where(is.numeric)))))) %>%
# Signature of NAs
rowwise() %>%
mutate(signature = paste(across(where(is.numeric), ~ ifelse(is.na(.x), "N", "Y")), collapse = "")) %>%
ungroup() %>%
# Role: TF, Rec, Lig, etc
mutate(role = symbol2role(symbol)) %>%
# Reset rownames
as.data.frame(row.names = 1:nrow(.)) %>%
# Formatting
mutate(symbol = as.link(symbol)) %>%
mutate(across(where(is.numeric), signif, 3)) %>%
DT::datatable(escape = FALSE, rownames = TRUE)
}
de_tables %>% make_common_de_table(slicer = slice_max)
Top genes that are DE (edgeR) in EAE.
list(
cb = de$cb_eae_vs_ctrl$table,
dm = de$dm_eae_vs_hthy$table
) %>%
lapply(. %>% filter(src == "edgeR")) %>%
lapply(. %>% filter((lfc < quantile(lfc, 0.03)) | (quantile(lfc, 0.97) < lfc))) %>%
with(merge(
cb %>% select(symbol, lfc.cb = lfc),
dm %>% select(symbol, lfc.dm = lfc),
by = "symbol"
)) %>%
ggplot(aes(x = lfc.cb, y = lfc.dm)) +
labs(x = "Healthy <- LFC Daneman -> EAE") +
labs(y = "Healthy <- LFC Castelo-Branco -> EAE") +
geom_hline(yintercept = 0, alpha = 0.2) +
geom_vline(xintercept = 0, alpha = 0.2) +
geom_point(size = 1, alpha = 0.2) +
ggrepel::geom_text_repel(
aes(label = symbol),
hjust = 1.1, size = 4, show.legend = FALSE,
max.overlaps = Inf, point.size = NA,
segment.alpha = 0.2
)

datasets$grouped <-
list(
list(ds = datasets$ab, name = "AB", group.by = "celltype"),
list(ds = datasets$bh, name = "Bh", group.by = "celltype"),
list(ds = datasets$cb, name = "CB", group.by = "celltype"),
list(ds = datasets$cb, name = "CB", group.by = "Group"),
list(ds = datasets$dm, name = "Dm", group.by = "cluster"),
list(ds = datasets$dm, name = "Dm", group.by = "subgroup")
) %>%
lapply(dual(
function(ds, name, group.by) {
ds %>%
# Subset genes
take_rows(
unique(goi %>% lapply(as.data.frame) %>% dual(rbind) %>% pull(genes))
) %>%
scuttle::aggregateAcrossCells(ids = ds[[group.by]], use.assay.type = "logcounts", statistics = "mean") %>%
SingleCellExperiment::logcounts() %>%
as.data.frame() %>%
ind2col("symbol") %>%
reshape2::melt(id.vars = "symbol", variable.name = "subgroup", value.name = "expression") %>%
mutate(dataset = name, subdataset = paste0(name, " (", group.by, ")"), group = group.by) %>%
mutate(role = symbol2role(symbol))
# TODO: add description?
}
)) %>%
dual(rbind)
interesting <- c(
# Daneman clusters
c("Rbp1", "Fn1", "Igfbp7"),
# Aerobic
c("Cox3b", "Cox7c", "Bloc1s1"),
# Anaerobic glycolysis
c("Ier3"),
# Collagen
c("Col4a1", "Col4a2", "Col8a1", "Col3a1"),
# Fib. migration
c("Sdc4")
)
show_stack <- function(ds) {
ds$role
ds %>%
mutate(role = if_else(role == "", "Unknown", role)) %>%
group_by(subdataset) %>%
mutate(expression = 100 * expression / max(expression)) %>%
ungroup() %>%
mutate(
symbol = if_else(symbol %in% interesting, glue("<span style='color:red;'>{symbol}</span>"), symbol)
) %>%
mutate(color = c(ramp(0:7), colors)[as.character(subgroup)]) %>%
mutate(subgroup = glue("<span name='{subgroup}' style='color:{color};'>{subgroup}</span>")) %>%
# Append role to symbol
# mutate(symbol = paste0(symbol, " (", role, ")")) %>%
# Heatmaps
ggplot(aes(x = subgroup, y = symbol, fill = expression)) +
geom_tile() +
scale_y_discrete(limits = rev) +
scale_fill_gradient(low = "black", high = "yellow") +
labs(fill = "%") +
theme(
axis.text.x = ggtext::element_markdown(
size = 12, angle = 90, vjust = 0.5, hjust = 1
),
axis.title = element_blank(),
legend.title = element_text(size = 9),
legend.text = element_text(size = 9),
# Remove the `role` strip altogether
# strip.text.y = element_blank(),
strip.text.y = element_text(color = "black", angle = 0, size = 8),
strip.text.x = element_text(color = "black", size = 10),
) +
facet_grid(
cols = vars(subdataset),
rows = vars(role),
space = "free",
scales = "free",
labeller = label_wrap_gen(5)
) +
theme(axis.text.y = ggtext::element_markdown())
}
Click on the plot to scroll through the clusters (shift-click to cycle back).
(0:7) %>%
lapply(function(i) {
datasets$grouped %>%
filter(symbol %in% goi[[paste0("c", i)]]$genes) %>%
show_stack() +
ggtitle(paste0("Cluster #", i)) +
theme(axis.text.y = ggtext::element_markdown(size = 12))
})








datasets$grouped %>%
filter(symbol %in% goi$neuroinflammation$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 12))

datasets$grouped %>%
filter(symbol %in% goi$col$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 9))

datasets$grouped %>%
filter(symbol %in% goi$air$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 7))

datasets$grouped %>%
filter(symbol %in% goi$glycolysis$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 6))

datasets$grouped %>%
filter(symbol %in% goi$fib_mig$genes) %>%
show_stack() +
theme(axis.text.y = ggtext::element_markdown(size = 10))

For some of the gene sets, the following tabs show reduced dimension plots (PCA/TSNE/UMAP) once the combined dataset is restricted to that gene set.
colors # required below
## 374_VLMC 375_VLMC 376_VLMC FB1
## "aquamarine3" "chartreuse4" "chartreuse3" "cornflowerblue"
## FB2 Ctrl EAE VLMC1
## "blue" "chartreuse4" "coral2" "chartreuse1"
## VLMC3 VLMC4 healthy EAE1
## "coral3" "coral4" "green" "coral1"
## EAE2 healthy1 healthy2 healthy3
## "coral3" "aquamarine" "chartreuse3" "chartreuse4"
## positive negative G1.S S
## "Red4" "Steelblue1" "#E16A86" "#B88A00"
## G2 G2.M M.G1
## "#50A315" "#00AD9A" "#009ADE"
common_map <- function(sce, dimred = NULL) {
sce %<>% drop_empty()
if (is.null(dimred)) {
sce %<>% norm1() %>% scater_attach()
return(list(
PCA = common_map(sce, dimred = "PCA"),
TSNE = common_map(sce, dimred = "TSNE"),
UMAP = common_map(sce, dimred = "UMAP")
))
}
sce %>%
{
cbind(
.@colData %>% as.data.frame(),
list(
TSNE = .@int_colData$reducedDims$TSNE %>%
as.data.frame() %>%
select(x = V1, y = V2),
UMAP = .@int_colData$reducedDims$UMAP %>%
as.data.frame() %>%
select(x = V1, y = V2),
PCA = .@int_colData$reducedDims$PCA %>%
as.data.frame() %>%
select(x = PC1, y = PC2)
)[[dimred]]
)
} %>%
group_by(group) %>%
arrange(subgroup) %>%
mutate(rk = as.factor(dense_rank(subgroup))) %>%
ungroup() %>%
mutate(color = colors[as.character(subgroup)]) %>%
mutate(subgroup = paste0(group, ": ", subgroup)) %>%
{
ggplot((.), aes(x = x, y = y, color = subgroup)) +
geom_point(data = ((.) %>% select(-group)), alpha = 0.05, size = 1) +
geom_point(alpha = 0.5, size = 1.5) +
facet_wrap(. ~ group) +
scale_color_manual(values = (select(., color, subgroup) %>% pull(color, subgroup))) +
ggplot2::ggtitle(dimred) +
theme(
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
plot.title = element_text(),
) +
guides(color = guide_legend(override.aes = list(size = 3, alpha = 1)))
}
}
common_map_3d <- function(sce, color.by = "subgroup") {
sce %<>%
drop_empty() %>%
norm1() %>%
scater::logNormCounts() %>%
scater::runPCA()
data.frame(
color_group = sce@colData[[color.by]],
lib.size = sce@assays@data$counts %>% colSums(),
SingleCellExperiment::reducedDim(sce, "PCA") %>%
as.data.frame() %>%
select(PC1, PC2, PC3)
) %>%
group_by(color_group) %>%
sample(size = 33, replace = TRUE) %>%
ungroup() %>%
# plotly::plot_ly(type = "scatter3d", mode = "markers",
plotly::plot_ly() %>%
plotly::add_markers(
x = ~PC1, y = ~PC2, z = ~PC3,
marker = list(size = 2, opacity = 1),
color = ~color_group, colors = colors
) %>%
plotly::layout(
scene = list(
xaxis = list(title = "PC1", showticklabels = FALSE),
yaxis = list(title = "PC2", showticklabels = FALSE),
zaxis = list(title = "PC3", showticklabels = FALSE)
)
)
}
datasets$combo %>% common_map()



datasets$combo %>% common_map_3d()
geneset <- goi$neuroinflammation$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
geneset <- goi$col$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
geneset <- goi$air$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
geneset <- goi$glycolysis$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
geneset <- goi$fib_mig$genes
datasets$combo %>%
take_rows(geneset) %>%
common_map()



datasets$combo %>%
take_rows(geneset) %>%
common_map_3d()
For each dataset we compare the most-DE genes to those from the kidney fibroblast activation gene set SI7 (see above). The LFCs are first standardized and filtered to top absolute LFC.
de_tables_vs_si7 <-
de_tables %>%
dual(rbind) %>%
group_by(name, src) %>%
mutate(norm.lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
filter(abs(norm.lfc) >= quantile(abs(norm.lfc), 0.95)) %>%
ungroup() %>%
merge(
y = fb_genes_si7 %>%
select(lfc = logFC) %>%
ind2col("symbol") %>%
mutate(norm.lfc = (lfc - mean(lfc)) / sd(lfc)) %>%
filter(abs(norm.lfc) >= quantile(abs(norm.lfc), 0.95)) %>%
rename(norm.lfc.ref = norm.lfc),
by = "symbol",
all = TRUE
) %>%
mutate(delta = norm.lfc - norm.lfc.ref)
We only keep the LFC from edgeR.
de_tables_vs_si7 %>%
tidyr::drop_na() %>%
filter(src == "edgeR") %>%
group_by(name, src) %>%
mutate(is_out = FALSE) %>%
mutate(
is_out = is_out |
symbol %in% (slice_min(., norm.lfc, n = 5) %>% pull(symbol)) |
symbol %in% (slice_max(., norm.lfc, n = 5) %>% pull(symbol))
) %>%
mutate(
is_out = is_out |
symbol %in% (slice_min(., norm.lfc.ref, n = 5) %>% pull(symbol)) |
symbol %in% (slice_max(., norm.lfc.ref, n = 5) %>% pull(symbol))
) %>%
mutate(label = if_else(is_out, symbol, "")) %>%
ungroup() %>%
ggplot(aes(x = norm.lfc, y = norm.lfc.ref, color = name)) +
ggrepel::geom_text_repel(
aes(label = label),
hjust = 1.1, size = 3, show.legend = FALSE,
max.overlaps = Inf, point.size = NA,
segment.alpha = 0.2
) +
geom_point(size = 1, alpha = 0.3) +
geom_hline(yintercept = 0, alpha = 0.2) +
geom_vline(xintercept = 0, alpha = 0.2) +
labs(x = "Standardized LFC in Dataset", y = "Standardized LFC in Reference", color = "Dataset") +
scale_color_brewer(palette = "Set1") +
theme(
legend.title = element_text(size = 9),
legend.text = element_text(size = 9),
legend.position = "bottom"
) +
guides(color = guide_legend(override.aes = list(alpha = 1, size = 3)))

The genes that have a very different LFC (between two conditions within a dataset) from the set SI7 (see data sources) are the one with the most positive or most negative “delta”.
de_tables_vs_si7 %>%
tidyr::drop_na() %>%
select(symbol, dataset = name, src, norm.lfc, norm.lfc.ref, delta) %>%
mutate(symbol = as.link(symbol)) %>%
DT::datatable(escape = FALSE)
Clean up.
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 9609943 513.3 16396008 875.7 16396008 875.7
## Vcells 146048498 1114.3 295234038 2252.5 295222795 2252.4
RAM usage.
culprits() %>%
head(10) %>%
kable()
| size | cumsize | unit | |
|---|---|---|---|
| datasets | 470.0 | 690 | Mb |
| omnipath.tf | 66.0 | 220 | Mb |
| omnipath.in | 41.0 | 150 | Mb |
| gene.summary | 39.0 | 110 | Mb |
| de_tables | 19.0 | 73 | Mb |
| de | 17.0 | 54 | Mb |
| go_info | 16.0 | 37 | Mb |
| go | 5.8 | 22 | Mb |
| omnipath.lr | 4.1 | 16 | Mb |
| show_coex_graph | 1.6 | 12 | Mb |